sglang 0.3.3.post1__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. sglang/bench_latency.py +28 -10
  2. sglang/bench_server_latency.py +21 -10
  3. sglang/bench_serving.py +101 -7
  4. sglang/global_config.py +0 -1
  5. sglang/srt/layers/attention/__init__.py +27 -5
  6. sglang/srt/layers/attention/double_sparsity_backend.py +281 -0
  7. sglang/srt/layers/attention/flashinfer_backend.py +352 -83
  8. sglang/srt/layers/attention/triton_backend.py +6 -4
  9. sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py +772 -0
  10. sglang/srt/layers/attention/triton_ops/extend_attention.py +5 -3
  11. sglang/srt/layers/attention/triton_ops/prefill_attention.py +4 -2
  12. sglang/srt/layers/sampler.py +6 -2
  13. sglang/srt/managers/detokenizer_manager.py +31 -10
  14. sglang/srt/managers/io_struct.py +4 -0
  15. sglang/srt/managers/schedule_batch.py +120 -43
  16. sglang/srt/managers/schedule_policy.py +2 -1
  17. sglang/srt/managers/scheduler.py +202 -140
  18. sglang/srt/managers/tokenizer_manager.py +5 -1
  19. sglang/srt/managers/tp_worker.py +111 -1
  20. sglang/srt/mem_cache/chunk_cache.py +8 -4
  21. sglang/srt/mem_cache/memory_pool.py +77 -4
  22. sglang/srt/mem_cache/radix_cache.py +15 -7
  23. sglang/srt/model_executor/cuda_graph_runner.py +4 -4
  24. sglang/srt/model_executor/forward_batch_info.py +16 -21
  25. sglang/srt/model_executor/model_runner.py +60 -1
  26. sglang/srt/models/baichuan.py +2 -3
  27. sglang/srt/models/chatglm.py +5 -6
  28. sglang/srt/models/commandr.py +1 -2
  29. sglang/srt/models/dbrx.py +1 -2
  30. sglang/srt/models/deepseek.py +4 -5
  31. sglang/srt/models/deepseek_v2.py +5 -6
  32. sglang/srt/models/exaone.py +1 -2
  33. sglang/srt/models/gemma.py +2 -2
  34. sglang/srt/models/gemma2.py +5 -5
  35. sglang/srt/models/gpt_bigcode.py +5 -5
  36. sglang/srt/models/grok.py +1 -2
  37. sglang/srt/models/internlm2.py +1 -2
  38. sglang/srt/models/llama.py +1 -2
  39. sglang/srt/models/llama_classification.py +1 -2
  40. sglang/srt/models/llama_reward.py +2 -3
  41. sglang/srt/models/llava.py +4 -8
  42. sglang/srt/models/llavavid.py +1 -2
  43. sglang/srt/models/minicpm.py +1 -2
  44. sglang/srt/models/minicpm3.py +5 -6
  45. sglang/srt/models/mixtral.py +1 -2
  46. sglang/srt/models/mixtral_quant.py +1 -2
  47. sglang/srt/models/olmo.py +352 -0
  48. sglang/srt/models/olmoe.py +1 -2
  49. sglang/srt/models/qwen.py +1 -2
  50. sglang/srt/models/qwen2.py +1 -2
  51. sglang/srt/models/qwen2_moe.py +4 -5
  52. sglang/srt/models/stablelm.py +1 -2
  53. sglang/srt/models/torch_native_llama.py +1 -2
  54. sglang/srt/models/xverse.py +1 -2
  55. sglang/srt/models/xverse_moe.py +4 -5
  56. sglang/srt/models/yivl.py +1 -2
  57. sglang/srt/openai_api/adapter.py +92 -49
  58. sglang/srt/openai_api/protocol.py +10 -2
  59. sglang/srt/sampling/penaltylib/orchestrator.py +28 -9
  60. sglang/srt/sampling/sampling_batch_info.py +92 -58
  61. sglang/srt/sampling/sampling_params.py +2 -0
  62. sglang/srt/server.py +116 -17
  63. sglang/srt/server_args.py +121 -45
  64. sglang/srt/utils.py +11 -3
  65. sglang/test/few_shot_gsm8k.py +4 -1
  66. sglang/test/few_shot_gsm8k_engine.py +144 -0
  67. sglang/test/srt/sampling/penaltylib/utils.py +16 -12
  68. sglang/version.py +1 -1
  69. {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/METADATA +72 -29
  70. {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/RECORD +73 -70
  71. {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/WHEEL +1 -1
  72. sglang/srt/layers/attention/flashinfer_utils.py +0 -237
  73. {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/LICENSE +0 -0
  74. {sglang-0.3.3.post1.dist-info → sglang-0.3.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,144 @@
1
+ import argparse
2
+ import ast
3
+ import asyncio
4
+ import json
5
+ import re
6
+ import time
7
+
8
+ import numpy as np
9
+
10
+ import sglang as sgl
11
+ from sglang.api import set_default_backend
12
+ from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
13
+ from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
14
+
15
+ INVALID = -9999999
16
+
17
+
18
+ def get_one_example(lines, i, include_answer):
19
+ ret = "Question: " + lines[i]["question"] + "\nAnswer:"
20
+ if include_answer:
21
+ ret += " " + lines[i]["answer"]
22
+ return ret
23
+
24
+
25
+ def get_few_shot_examples(lines, k):
26
+ ret = ""
27
+ for i in range(k):
28
+ ret += get_one_example(lines, i, True) + "\n\n"
29
+ return ret
30
+
31
+
32
+ def get_answer_value(answer_str):
33
+ answer_str = answer_str.replace(",", "")
34
+ numbers = re.findall(r"\d+", answer_str)
35
+ if len(numbers) < 1:
36
+ return INVALID
37
+ try:
38
+ return ast.literal_eval(numbers[-1])
39
+ except SyntaxError:
40
+ return INVALID
41
+
42
+
43
+ async def concurrent_generate(engine, prompts, sampling_param):
44
+ tasks = []
45
+ for prompt in prompts:
46
+ tasks.append(asyncio.create_task(engine.async_generate(prompt, sampling_param)))
47
+
48
+ outputs = await asyncio.gather(*tasks)
49
+ return outputs
50
+
51
+
52
+ def run_eval(args):
53
+ # Select backend
54
+ engine = sgl.Engine(model_path=args.model_path, log_level="error")
55
+
56
+ if args.local_data_path is None:
57
+ # Read data
58
+ url = "https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl"
59
+ filename = download_and_cache_file(url)
60
+ else:
61
+ filename = args.local_data_path
62
+
63
+ lines = list(read_jsonl(filename))
64
+
65
+ # Construct prompts
66
+ num_questions = args.num_questions
67
+ num_shots = args.num_shots
68
+ few_shot_examples = get_few_shot_examples(lines, num_shots)
69
+
70
+ questions = []
71
+ labels = []
72
+ for i in range(len(lines[:num_questions])):
73
+ questions.append(get_one_example(lines, i, False))
74
+ labels.append(get_answer_value(lines[i]["answer"]))
75
+ assert all(l != INVALID for l in labels)
76
+ arguments = [{"question": q} for q in questions]
77
+
78
+ # construct the prompts
79
+ prompts = []
80
+ for i, arg in enumerate(arguments):
81
+ q = arg["question"]
82
+ prompt = few_shot_examples + q
83
+ prompts.append(prompt)
84
+
85
+ sampling_param = {
86
+ "stop": ["Question", "Assistant:", "<|separator|>"],
87
+ "max_new_tokens": 512,
88
+ "temperature": 0,
89
+ }
90
+
91
+ # Run requests
92
+ tic = time.time()
93
+
94
+ loop = asyncio.get_event_loop()
95
+
96
+ outputs = loop.run_until_complete(
97
+ concurrent_generate(engine, prompts, sampling_param)
98
+ )
99
+
100
+ # End requests
101
+ latency = time.time() - tic
102
+
103
+ # Shutdown the engine
104
+ engine.shutdown()
105
+
106
+ # Parse output
107
+ preds = []
108
+
109
+ for output in outputs:
110
+ preds.append(get_answer_value(output["text"]))
111
+
112
+ # Compute accuracy
113
+ acc = np.mean(np.array(preds) == np.array(labels))
114
+ invalid = np.mean(np.array(preds) == INVALID)
115
+
116
+ # Compute speed
117
+ num_output_tokens = sum(
118
+ output["meta_info"]["completion_tokens"] for output in outputs
119
+ )
120
+ output_throughput = num_output_tokens / latency
121
+
122
+ # Print results
123
+ print(f"Accuracy: {acc:.3f}")
124
+ print(f"Invalid: {invalid:.3f}")
125
+ print(f"Latency: {latency:.3f} s")
126
+ print(f"Output throughput: {output_throughput:.3f} token/s")
127
+
128
+ return {
129
+ "accuracy": acc,
130
+ "latency": latency,
131
+ "output_throughput": output_throughput,
132
+ }
133
+
134
+
135
+ if __name__ == "__main__":
136
+ parser = argparse.ArgumentParser()
137
+ parser.add_argument(
138
+ "--model-path", type=str, default="meta-llama/Meta-Llama-3.1-8B-Instruct"
139
+ )
140
+ parser.add_argument("--local-data-path", type=Optional[str], default=None)
141
+ parser.add_argument("--num-shots", type=int, default=5)
142
+ parser.add_argument("--num-questions", type=int, default=200)
143
+ args = parser.parse_args()
144
+ metrics = run_eval(args)
@@ -164,19 +164,20 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
164
164
  msg=f"key={key}\nactual={getattr(penalizer, key)}\nexpected={tensor}",
165
165
  )
166
166
 
167
- actual = orchestrator.apply(
168
- torch.ones(
169
- size=(len(case.test_subjects), self.vocab_size),
170
- dtype=torch.float32,
171
- device=self.device,
172
- )
167
+ original = torch.ones(
168
+ size=(len(case.test_subjects), self.vocab_size),
169
+ dtype=torch.float32,
170
+ device=self.device,
173
171
  )
172
+ actual = orchestrator.apply(original.clone())
174
173
  expected = torch.cat(
175
174
  tensors=[
176
175
  subject.steps[0].expected_logits
177
176
  for subject in case.test_subjects
178
177
  ],
179
178
  )
179
+ if actual is None:
180
+ actual = original
180
181
  torch.testing.assert_close(
181
182
  actual=actual,
182
183
  expected=expected,
@@ -226,6 +227,8 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
226
227
  device=self.device,
227
228
  )
228
229
  )
230
+ if actual_logits is None:
231
+ continue
229
232
  filtered_expected_logits = torch.cat(
230
233
  tensors=[
231
234
  subject.steps[0].expected_logits
@@ -317,19 +320,20 @@ class BaseBatchedPenalizerTest(unittest.TestCase):
317
320
  msg=f"key={key}\nactual={getattr(penalizer, key)}\nexpected={tensor}",
318
321
  )
319
322
 
320
- actual_logits = orchestrator.apply(
321
- torch.ones(
322
- size=(len(filtered_subjects), self.vocab_size),
323
- dtype=torch.float32,
324
- device=self.device,
325
- )
323
+ original = torch.ones(
324
+ size=(len(filtered_subjects), self.vocab_size),
325
+ dtype=torch.float32,
326
+ device=self.device,
326
327
  )
328
+ actual_logits = orchestrator.apply(original.clone())
327
329
  filtered_expected_logits = torch.cat(
328
330
  tensors=[
329
331
  subject.steps[i].expected_logits
330
332
  for subject in filtered_subjects
331
333
  ],
332
334
  )
335
+ if actual_logits is None:
336
+ actual_logits = original
333
337
  torch.testing.assert_close(
334
338
  actual=actual_logits,
335
339
  expected=filtered_expected_logits,
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.3.3.post1"
1
+ __version__ = "0.3.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.3.3.post1
3
+ Version: 0.3.4
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -219,36 +219,49 @@ Requires-Dist: sglang[srt]; extra == "all"
219
219
  Requires-Dist: sglang[openai]; extra == "all"
220
220
  Requires-Dist: sglang[anthropic]; extra == "all"
221
221
  Requires-Dist: sglang[litellm]; extra == "all"
222
+ Provides-Extra: all_xpu
223
+ Requires-Dist: sglang[srt_xpu]; extra == "all-xpu"
224
+ Requires-Dist: sglang[openai]; extra == "all-xpu"
225
+ Requires-Dist: sglang[anthropic]; extra == "all-xpu"
226
+ Requires-Dist: sglang[litellm]; extra == "all-xpu"
222
227
  Provides-Extra: anthropic
223
228
  Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
224
229
  Provides-Extra: dev
225
230
  Requires-Dist: sglang[all]; extra == "dev"
226
231
  Requires-Dist: sglang[test]; extra == "dev"
232
+ Provides-Extra: dev_xpu
233
+ Requires-Dist: sglang[all_xpu]; extra == "dev-xpu"
234
+ Requires-Dist: sglang[test]; extra == "dev-xpu"
227
235
  Provides-Extra: litellm
228
236
  Requires-Dist: litellm>=1.0.0; extra == "litellm"
229
237
  Provides-Extra: openai
230
238
  Requires-Dist: openai>=1.0; extra == "openai"
231
239
  Requires-Dist: tiktoken; extra == "openai"
240
+ Provides-Extra: runtime_common
241
+ Requires-Dist: aiohttp; extra == "runtime-common"
242
+ Requires-Dist: decord; extra == "runtime-common"
243
+ Requires-Dist: fastapi; extra == "runtime-common"
244
+ Requires-Dist: hf-transfer; extra == "runtime-common"
245
+ Requires-Dist: huggingface-hub; extra == "runtime-common"
246
+ Requires-Dist: interegular; extra == "runtime-common"
247
+ Requires-Dist: orjson; extra == "runtime-common"
248
+ Requires-Dist: packaging; extra == "runtime-common"
249
+ Requires-Dist: pillow; extra == "runtime-common"
250
+ Requires-Dist: psutil; extra == "runtime-common"
251
+ Requires-Dist: pydantic; extra == "runtime-common"
252
+ Requires-Dist: python-multipart; extra == "runtime-common"
253
+ Requires-Dist: torchao; extra == "runtime-common"
254
+ Requires-Dist: uvicorn; extra == "runtime-common"
255
+ Requires-Dist: uvloop; extra == "runtime-common"
256
+ Requires-Dist: zmq; extra == "runtime-common"
257
+ Requires-Dist: outlines>=0.0.44; extra == "runtime-common"
258
+ Requires-Dist: modelscope; extra == "runtime-common"
232
259
  Provides-Extra: srt
233
- Requires-Dist: aiohttp; extra == "srt"
234
- Requires-Dist: decord; extra == "srt"
235
- Requires-Dist: fastapi; extra == "srt"
236
- Requires-Dist: hf-transfer; extra == "srt"
237
- Requires-Dist: huggingface-hub; extra == "srt"
238
- Requires-Dist: interegular; extra == "srt"
239
- Requires-Dist: packaging; extra == "srt"
240
- Requires-Dist: pillow; extra == "srt"
241
- Requires-Dist: psutil; extra == "srt"
242
- Requires-Dist: pydantic; extra == "srt"
243
- Requires-Dist: python-multipart; extra == "srt"
260
+ Requires-Dist: sglang[runtime_common]; extra == "srt"
244
261
  Requires-Dist: torch; extra == "srt"
245
- Requires-Dist: torchao; extra == "srt"
246
- Requires-Dist: uvicorn; extra == "srt"
247
- Requires-Dist: uvloop; extra == "srt"
248
- Requires-Dist: zmq; extra == "srt"
249
262
  Requires-Dist: vllm==0.5.5; extra == "srt"
250
- Requires-Dist: outlines>=0.0.44; extra == "srt"
251
- Requires-Dist: modelscope; extra == "srt"
263
+ Provides-Extra: srt_xpu
264
+ Requires-Dist: sglang[runtime_common]; extra == "srt-xpu"
252
265
  Provides-Extra: test
253
266
  Requires-Dist: jsonlines; extra == "test"
254
267
  Requires-Dist: matplotlib; extra == "test"
@@ -270,14 +283,13 @@ Requires-Dist: peft; extra == "test"
270
283
 
271
284
  --------------------------------------------------------------------------------
272
285
 
273
- | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) | [**Slides**](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_dev_day_v2.pptx) | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2ngly9muu-t37XiH87qvD~6rVBTkTEHw) | [**Join Bi-Weekly Development Meeting (Oct. 19)**](https://calendar.app.google/GYW7S8QGoanCuaxW6) |
274
-
275
- ## Upcoming Events
276
- - [Oct. 16, 2024] Online meetup for efficient LLM deployment and serving, co-hosted by SGLang, FlashInfer, and MLC LLM! Fill out the [Google form](https://forms.gle/B3YeedLxmrrhL1NM8) to receive the invite link.
286
+ | [**Blog**](https://lmsys.org/blog/2024-07-25-sglang-llama3/) | [**Paper**](https://arxiv.org/abs/2312.07104) | [**Slides**](https://github.com/sgl-project/sgl-learning-materials/blob/main/slides/amd_dev_day_v2.pdf) | [**Learn More**](https://github.com/sgl-project/sgl-learning-materials) | [**Join Slack**](https://join.slack.com/t/sgl-fru7574/shared_invite/zt-2ngly9muu-t37XiH87qvD~6rVBTkTEHw) |
287
+ [**Join Bi-Weekly Development Meeting (Oct. 19)**](https://calendar.app.google/GYW7S8QGoanCuaxW6) |
277
288
 
278
289
  ## News
279
- - [2024/09] 🔥 SGLang v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
280
- - [2024/07] 🔥 Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
290
+ - [2024/10] 🔥 The First SGLang Online Meetup ([slides](https://github.com/sgl-project/sgl-learning-materials?tab=readme-ov-file#the-first-sglang-online-meetup)).
291
+ - [2024/09] SGLang v0.3 Release: 7x Faster DeepSeek MLA, 1.5x Faster torch.compile, Multi-Image/Video LLaVA-OneVision ([blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)).
292
+ - [2024/07] Faster Llama3 Serving with SGLang Runtime (vs. TensorRT-LLM, vLLM) ([blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/)).
281
293
  - [2024/02] SGLang enables **3x faster JSON decoding** with compressed finite state machine ([blog](https://lmsys.org/blog/2024-02-05-compressed-fsm/)).
282
294
 
283
295
  <details>
@@ -323,7 +335,7 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/
323
335
  ### Method 2: From source
324
336
  ```
325
337
  # Use the last release branch
326
- git clone -b v0.3.3.post1 https://github.com/sgl-project/sglang.git
338
+ git clone -b v0.3.4 https://github.com/sgl-project/sglang.git
327
339
  cd sglang
328
340
 
329
341
  pip install --upgrade pip
@@ -500,6 +512,40 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct
500
512
  python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1
501
513
  ```
502
514
 
515
+ ### Engine Without HTTP Server
516
+
517
+ We also provide an inference engine **without a HTTP server**. For example,
518
+
519
+ ```python
520
+ import sglang as sgl
521
+
522
+
523
+ def main():
524
+ prompts = [
525
+ "Hello, my name is",
526
+ "The president of the United States is",
527
+ "The capital of France is",
528
+ "The future of AI is",
529
+ ]
530
+ sampling_params = {"temperature": 0.8, "top_p": 0.95}
531
+ llm = sgl.Engine(model_path="meta-llama/Meta-Llama-3.1-8B-Instruct")
532
+
533
+ outputs = llm.generate(prompts, sampling_params)
534
+ for prompt, output in zip(prompts, outputs):
535
+ print("===============================")
536
+ print(f"Prompt: {prompt}\nGenerated text: {output['text']}")
537
+
538
+ if __name__ == "__main__":
539
+ main()
540
+ ```
541
+
542
+ This can be used for:
543
+
544
+ 1. **Offline Batch Inference**
545
+ 2. **Building Custom Servers**
546
+
547
+ You can view the full example [here](https://github.com/sgl-project/sglang/tree/main/examples/runtime/engine)
548
+
503
549
  ### Supported Models
504
550
 
505
551
  **Generative Models**
@@ -836,10 +882,7 @@ def chat_example(s):
836
882
  - The `regex` argument in `sgl.gen` is implemented through autoregressive decoding with logit bias masking, according to the constraints set by the regex. It is compatible with `temperature=0` and `temperature != 0`.
837
883
 
838
884
  ## Benchmark And Performance
839
- ![8b_throughput](https://lmsys.org/images/blog/sglang_llama3/8b_throughput.svg)
840
- ![70b_fp8_throughput](https://lmsys.org/images/blog/sglang_llama3/70b_fp8_throughput.svg)
841
-
842
- Learn more at this [blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/).
885
+ Learn more in our release blogs: [v0.2](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3](https://lmsys.org/blog/2024-09-04-sglang-v0-3/).
843
886
 
844
887
  ## Roadmap
845
888
  [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
@@ -1,14 +1,14 @@
1
1
  sglang/__init__.py,sha256=b_pqO9bR2fjK9En_tigfzKTiQzE8b_hUizY0DAKVk1M,1616
2
2
  sglang/api.py,sha256=5x591S4rLbmNPs75qPwGKVu1sonVGDyjPAJlHTyWw50,6956
3
- sglang/bench_latency.py,sha256=gCS_nPNCf3sYR83jg6_KNadm-Xy7-I1V-UdZaoKFi8M,17889
4
- sglang/bench_server_latency.py,sha256=rRSDqjJ5jan9AzppOGx75KRUjZCU2dUG2h06CQOdJgk,5377
5
- sglang/bench_serving.py,sha256=1AQzkQ8ci9-rMZEM7wap8I09oPP4AZd93RfXMQRgVro,36386
3
+ sglang/bench_latency.py,sha256=RWSyZ-UhLV6dyPMMtK3nSOoNsjCY5xMpYKeUKRNtdcA,18276
4
+ sglang/bench_server_latency.py,sha256=2AMPwU2_85q-Btz9UdZC-TnZJPgXcNkydvFYWn2CJlU,5892
5
+ sglang/bench_serving.py,sha256=jcxNP7reIJPh3x1hG5TCM6wMlDXshjyMJUUjL2O7kzs,40060
6
6
  sglang/check_env.py,sha256=rGRABCgt-0SfUrow4px28b2P59aMn8eVTnN5eZc_a8s,5397
7
- sglang/global_config.py,sha256=38id86i3tRGCSOFZlN1LM01a3xt-V98xuNgKGG9boCk,1058
7
+ sglang/global_config.py,sha256=1r_W9rrBxGCCc2eqESRduOMMNq46e54xLgFLifHuQm0,1014
8
8
  sglang/launch_server.py,sha256=UnjNjYuZ8TtvmRtgYEsFImkbvCwvn_tQjk0V7cHy67E,450
9
9
  sglang/launch_server_llavavid.py,sha256=olPKyhozi1coCwoRMwBRYWsTFByrgus9CwPSeNmskgc,1002
10
10
  sglang/utils.py,sha256=NA_4xUrTI7KICQ3PEACfNWKE3nxSA5QvQZJNd4TQrDc,9395
11
- sglang/version.py,sha256=7Z8nSxbc04sgIKYqfKxkmSnG2nnSPT9dpM3RYiFOpUc,28
11
+ sglang/version.py,sha256=oYLGMpySamd16KLiaBTfRyrAS7_oyp-TOEHmzmeumwg,22
12
12
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  sglang/lang/chat_template.py,sha256=uqI_I9zIKXGXg7-W-yjqvx1ZeS_TuwFCms6wkmC2QmY,13411
14
14
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -26,9 +26,9 @@ sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bE
26
26
  sglang/srt/conversation.py,sha256=B4QPGOUVdoXRJwWWxSm5pfifGpuBs07fDTxJ1BHUXLw,20003
27
27
  sglang/srt/hf_transformers_utils.py,sha256=rt6flb6BoYTO8fw7AKCXmQLJx5XuSUuRmZX-VJHmuLQ,6064
28
28
  sglang/srt/mm_utils.py,sha256=zox644S3IHUWmADdK4MnIbdTS2DWHOy0_Dq0gCU38QQ,12273
29
- sglang/srt/server.py,sha256=hb27kEsOzQeVy5HuMVRJNOG8OsFwq0KZBVsZXB2qN1U,23267
30
- sglang/srt/server_args.py,sha256=_Y7YLlGYOWpR1mtfN8Hmi6hsldkp4q8vLhcNatHhsuQ,24374
31
- sglang/srt/utils.py,sha256=PYApZ2rFU67TnJaKkkjF9Z93jBPCJkotB0kk1vHGI6Y,22858
29
+ sglang/srt/server.py,sha256=65b39k4FN_TzL8qAimS1mRx8xdO8jmKCdUftOISUv7M,26809
30
+ sglang/srt/server_args.py,sha256=IDuX8ZCJd_6t2xHf7wNGskVgvpAQtUcFSGBnKFnLf3U,27290
31
+ sglang/srt/utils.py,sha256=0zalNeGrtrIyfmD7DHLRqocCY1_yNbPCD5hmionHpL0,23071
32
32
  sglang/srt/configs/__init__.py,sha256=292SuEorST-lAq2Uvsv2M7yC28uYZlssVvRDsF-bZCQ,86
33
33
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
34
34
  sglang/srt/configs/model_config.py,sha256=36My-o44trhWY3KYDeSFMGvv9XuUtIVI5e7F8VlOTWo,6723
@@ -42,15 +42,16 @@ sglang/srt/layers/linear.py,sha256=9rjCiSb_QOn5RgpVjIhEKdReRvSYVfcTSjbWBEbApLI,4
42
42
  sglang/srt/layers/logits_processor.py,sha256=Fq7VHwjP4iSzl_OBLo8qw_HVbIDbYB-0MGmfiD3Jk_E,12521
43
43
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
44
44
  sglang/srt/layers/radix_attention.py,sha256=i07VRXPDHj-zJ1TSrXEqCxumQwYSHwAvc8DoIg-Irtg,1964
45
- sglang/srt/layers/sampler.py,sha256=J5vd0CcLpLfgtLniCoe2VF6hjM_ld76hbDG4p1qoAMc,4010
45
+ sglang/srt/layers/sampler.py,sha256=23wRDw2Fs3wZfPBh6gFBz2vqwxnoDd9LAHWq7YdQWlc,4166
46
46
  sglang/srt/layers/torchao_utils.py,sha256=1nzZkSzbF4qCAMeBKAeeDpMl_mK8imiY2RL3xFEgvAw,3340
47
- sglang/srt/layers/attention/__init__.py,sha256=zLLwinbYLAQHfVEz0jZiVa_cYNgSYoy4wYD_0y-ErHQ,1798
48
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=DOvm-d3XLjE6XJDD3a8aCnlpuAJZZ946YFDH_Ec4lqc,10150
49
- sglang/srt/layers/attention/flashinfer_utils.py,sha256=9YMt7ab6F0gEVkxdVm8vDB0LVBRYRL0XIKVrmndp4n8,7571
50
- sglang/srt/layers/attention/triton_backend.py,sha256=I_kw0LXdgziHAFC8Qv5n5PDFJRLvZyzVsXwjmFZ0KSc,6041
47
+ sglang/srt/layers/attention/__init__.py,sha256=hyrPpnuiTs5VIZNyoIjZLRsHUX20gX2dvY9kkqyXIII,2158
48
+ sglang/srt/layers/attention/double_sparsity_backend.py,sha256=owzPwLWcShZ0ezkVjBr0vV73vtQIUh8z-rcQtXLG1fk,10374
49
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=y9saTqOhb_tJoRtjq9sishlQFGYlFkQD7QcV9x_ureo,19554
50
+ sglang/srt/layers/attention/triton_backend.py,sha256=B6YuIZWh4Zn85Q57i0z3eZ08RCrS0rfyPJzkRr_zOIQ,6150
51
51
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=XCQTX0kUttT1AG5FRMgfQbiXgvoempYD0UR2r6D_vJg,16711
52
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=oyqon1KG5-ICHcCANAbrglXLYKvWHFML-4tIQI9M5VI,11063
53
- sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=QkXPcT02c13zha2M4mBm2S5dh_sS-Gc4FkkrcywRqvc,5377
52
+ sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
53
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=nEG7iBh1pAy3WaqPdLZwCJwDgyk5HLQ181kBS2nxbwg,11179
54
+ sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=bNHHZeEowwI1wwOWj2T6bjBTBtVZUbcL-0cgfZwpHek,5471
54
55
  sglang/srt/layers/fused_moe/__init__.py,sha256=bWCrDdOy2ANEXTb8CHYO63O3Iu3eZnn0PJbgl0z5vvE,75
55
56
  sglang/srt/layers/fused_moe/fused_moe.py,sha256=1WM2cObWXcFWtqh_utGJFPnrT344rORwuQ9hJDaH2s0,23104
56
57
  sglang/srt/layers/fused_moe/layer.py,sha256=raFyvPzjYz-Fv8B3IcOxQYKKCWqXis5mXwg1GFE61y4,22243
@@ -61,65 +62,67 @@ sglang/srt/lora/lora.py,sha256=a5j_Yy0s95msVPFgOuH5PCe7sMu0AyZFQ5wL0H-YIg8,14913
61
62
  sglang/srt/lora/lora_config.py,sha256=paVB7F7SIuxr_vodvKf8zzAlH2fdVYHhXxcXV62D0Vo,1411
62
63
  sglang/srt/lora/lora_manager.py,sha256=gzBwYXZEPYj56PkGTshTbWRfl_370wb6uTcRhDaLiF8,12801
63
64
  sglang/srt/managers/data_parallel_controller.py,sha256=GJGfX1-5DoQFZ-EMh_p02nvrOtrOc0UebnULWHhFrss,5765
64
- sglang/srt/managers/detokenizer_manager.py,sha256=iCLPdHkL6lAp_-Qew1u4Tyt3jYRkJ8i-Bj3l8TC-uaA,7278
65
+ sglang/srt/managers/detokenizer_manager.py,sha256=G2-Y-nDbq7LF8ZFWcXXcMkbCwzyBEh1g4UrciDlkNYY,7985
65
66
  sglang/srt/managers/image_processor.py,sha256=9Y9RqyLdbt4uOK7pnJCJIhY77791klskSrEg8U6pyS4,6910
66
- sglang/srt/managers/io_struct.py,sha256=PxeLOgRJR5raUXExmQHWAnvJZLU0BA_e591zthEOYAQ,12185
67
- sglang/srt/managers/schedule_batch.py,sha256=rev0x6tp2ex8uW4PPrcfJ6m6WgfhsNLpuPNWForYcGE,31363
68
- sglang/srt/managers/schedule_policy.py,sha256=PiTKvsAFwoNWNsv_SFkghIHCL452MdboRc2cmN6ITcU,11935
69
- sglang/srt/managers/scheduler.py,sha256=3eJjVZgLDyZWjniQf1Mkv1-1rbQyaOjyM0cRG-bNHAw,40625
70
- sglang/srt/managers/tokenizer_manager.py,sha256=AI1yfV5A1cpWDDuoelRx55lKMgNWccNOvyDFlKLddFA,24794
71
- sglang/srt/managers/tp_worker.py,sha256=fcaW-u7AAX49kQCNn_AEtdRPykRdT6Z6lx1O9LHA15E,4833
67
+ sglang/srt/managers/io_struct.py,sha256=QJ1Eu7XNsRH35ec5nUOUS2XSjiFWyjYHKsnMqviC_Mk,12298
68
+ sglang/srt/managers/schedule_batch.py,sha256=6LUXbAd6PvHIDk6iwpsufosahIT2z32JZjsNx5xeKcg,33930
69
+ sglang/srt/managers/schedule_policy.py,sha256=unDmK7Y_Ti0Eiizh3_iEFMsC1KDqGMTqU8MlQgg-6qo,11951
70
+ sglang/srt/managers/scheduler.py,sha256=Y7R-VkLt8Az2jZGrGRuhG1g4UPO5y-7b9BaOknuC2aI,43019
71
+ sglang/srt/managers/tokenizer_manager.py,sha256=SprHC0Bs8kvtnYboDPrH587uO_sdKHyp7tVBCdbEB9c,25066
72
+ sglang/srt/managers/tp_worker.py,sha256=nwkIXiasGA4w97pnMG32U1DN1RlLFkvETvl9q7SjGeY,8887
72
73
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=qEQwEkG4E5rab2ZoTqcesf5pR_J4nV2jBxIHsBJHtIM,924
73
- sglang/srt/mem_cache/chunk_cache.py,sha256=CjZZYlqQzq7mYOiBMLWA5XNb6HIyh5lIMdY-K0OUZEc,2368
74
+ sglang/srt/mem_cache/chunk_cache.py,sha256=VcCpyrf5FOQ5xoKeOouCI5ZQLkZo_pgY1SPbDDkagGg,2492
74
75
  sglang/srt/mem_cache/flush_cache.py,sha256=pTLKPRB17U6vl5RFJJvuJ4jCL2SyomgkUBNlkDpGRqo,978
75
- sglang/srt/mem_cache/memory_pool.py,sha256=L-5drUt7vlyvple4OcjH1jJRzt2qhVrpc9klZn-bQfE,7125
76
- sglang/srt/mem_cache/radix_cache.py,sha256=00bghOihUm7lA1i4gxxMYQLept9LaHg2ZSXZryuFZZI,10121
77
- sglang/srt/model_executor/cuda_graph_runner.py,sha256=iheZYErwFT_W4kJUE1dgbGoQQx7hyOSKa-Yv8guq0DI,10479
78
- sglang/srt/model_executor/forward_batch_info.py,sha256=FIQ8XIIP724mIL2l7w7mSEFH452qw-TPpqm43J4YeHM,5822
79
- sglang/srt/model_executor/model_runner.py,sha256=Qxp6VyL-yiDzaTQuAQPTRrHiqsZAT0ki94teubxbocc,23237
80
- sglang/srt/models/baichuan.py,sha256=50m43kIVo-YamHFwxyiLGG_pCbF7mzUJfhEyuuSmVC8,15100
81
- sglang/srt/models/chatglm.py,sha256=XaS_6-ZvRw7X-56sk9xQogqT0NzGEMVpiAdQnC5qbBY,13333
82
- sglang/srt/models/commandr.py,sha256=2urK7u2FiwPBl60hMmt-wfaJ8V-ilv6l1B37MUlvSxk,14121
83
- sglang/srt/models/dbrx.py,sha256=qTpyA1Iv56VI-ksPKt4JryX2Pn7T5FXAa0n0ZoT4qbw,14615
84
- sglang/srt/models/deepseek.py,sha256=4sl4YYoxqe-vif7KJKcMjMA3KgvzYHqpQBgM58lzLHc,15973
85
- sglang/srt/models/deepseek_v2.py,sha256=dt0FGAgW3jd7OJJnKfH-LIU13U0I9b7R9shYmAEins4,28390
86
- sglang/srt/models/exaone.py,sha256=9JfFhYbpcHMXIaBNn8rc_GOlkItkIgbGNslNyFD7gvU,13054
87
- sglang/srt/models/gemma.py,sha256=gui46inEJsrmppEMTUIQuzMxGPEBx_TjiZ5-PacjuSk,12240
88
- sglang/srt/models/gemma2.py,sha256=V0GjEdTqxyXvBqjgyiyONipohjOqw0pLITmZZRb2kIE,14890
89
- sglang/srt/models/gpt_bigcode.py,sha256=LgSm-8oxBfnzMAC4Jqqg-RJGge4E_wgJ1br7ylbTPZ0,10162
90
- sglang/srt/models/grok.py,sha256=lUR_SmD_KhIiZx5OVUPZp8VVdrAga6WWTdMKJ5PCFbw,14896
91
- sglang/srt/models/internlm2.py,sha256=4SUaeJl2dZlUowahfv7kLbz3jLXtmvdBPGURmhAeX6Q,12169
92
- sglang/srt/models/llama.py,sha256=5j66LmvFhOKgFZiE75mJ80XBjZ2dNx7e8Yea5lsD0P0,15828
93
- sglang/srt/models/llama_classification.py,sha256=Yhabu9FuBxjNo74crMsK0FqpD53ehOx_zcHgIXjvlvQ,3379
76
+ sglang/srt/mem_cache/memory_pool.py,sha256=ihVZXlJ_Fvs1L2c2SZQaijUYSn9X6eyiFiG2NNRQS_M,9297
77
+ sglang/srt/mem_cache/radix_cache.py,sha256=cS6G5uOW_0QICH30PXxatetka4wnELfhP4czHn8RDJE,10414
78
+ sglang/srt/model_executor/cuda_graph_runner.py,sha256=KgSBvoF2IyCGDFNXQyN7sV3E_S2NndeMQyaQZB97Pak,10499
79
+ sglang/srt/model_executor/forward_batch_info.py,sha256=rSazAtkWKyc2g2QILT2-AsUdaBt51hQBU1qcS2iw_Nw,5690
80
+ sglang/srt/model_executor/model_runner.py,sha256=hn-VeLABZL4y_GcJFl2y_C7x6ZS_Xw0tDyKzOxYMtVQ,25983
81
+ sglang/srt/models/baichuan.py,sha256=uV20fr7SqlATxoziXRtJyXFnZZoWTUy3qvQNFaEvw8M,15014
82
+ sglang/srt/models/chatglm.py,sha256=uep4Wy_2jwn_x6Cvagt5rs3JRY_AlcM-VXvRTCFC5Tc,13172
83
+ sglang/srt/models/commandr.py,sha256=WIMwjV3C0pRbVs4Xv9tqnHGreRvWC7zsML2hNuXw4A0,14060
84
+ sglang/srt/models/dbrx.py,sha256=_DshXyXr_xVB7wtE28PFcb6KDIkA6gygkXYKqivSCFc,14554
85
+ sglang/srt/models/deepseek.py,sha256=W342tVpEpkc_fvO_DTP4fX3EGF-DIFC4QOySdUGzl9w,15837
86
+ sglang/srt/models/deepseek_v2.py,sha256=5P5678aaMT4iO4jS0dZWUiRG4o9EE98xVgs6Zjy-Mr0,28229
87
+ sglang/srt/models/exaone.py,sha256=bIXdAXoWlCdfDdX2q47Br3QOa3jEYiiP2Hdd1T4crnM,12993
88
+ sglang/srt/models/gemma.py,sha256=4MVHwc5Jc4CSg3HIdNJEBYk8mhspjuwvc_6Oi8Cd-g8,12202
89
+ sglang/srt/models/gemma2.py,sha256=3VL223T_3syBG3fUInbtFaXvIs7dYjtsfX3OfDQc7m4,14777
90
+ sglang/srt/models/gpt_bigcode.py,sha256=q9N13Js2v0VheudWssRoSjnptS6TSf7DOmC8zLRGxeo,10049
91
+ sglang/srt/models/grok.py,sha256=vc7-E_hemNKaNORxg4rmaQcVYlpoavyaAZUG9B2dgbY,14835
92
+ sglang/srt/models/internlm2.py,sha256=-liQB13sgR3GnXJacBSMuEbLa2N4tICx0LsNgu_nNvU,12108
93
+ sglang/srt/models/llama.py,sha256=a43Y5mvMDmFcRcPL78vsAElaOvTqPajLPB2_BDwJ7pM,15767
94
+ sglang/srt/models/llama_classification.py,sha256=WcHYFez7qloTCpXLy1A6-dBGHWp22ebv6yG68jFVBjc,3318
94
95
  sglang/srt/models/llama_embedding.py,sha256=4j3WNLB-x7XQnJvohdRs7VSSEabbhiE2BRHmnG5IZRU,3453
95
- sglang/srt/models/llama_reward.py,sha256=qQOPfn-9oqhsD0EaffXtk-EXKRdSZL1X7CYAGCDoG9A,5383
96
- sglang/srt/models/llava.py,sha256=zbJs1P4_Bjh2_dSbyoheJZ1wGXuKHGz6BpV766G7ZUY,25094
97
- sglang/srt/models/llavavid.py,sha256=qhBGHTxzGAOMgqMiwOc3mUbaK6qeXsEYSlNmlEEIdeM,12198
98
- sglang/srt/models/minicpm.py,sha256=5vc-Lq7ggHrRxxkciVMdZ5Vq6ThLwnhFS62UCokFC2g,13792
99
- sglang/srt/models/minicpm3.py,sha256=hhhgZTKQApUZpH_MYQZTk3K1Ox-xpJRxGCemoUw8x4U,25184
96
+ sglang/srt/models/llama_reward.py,sha256=ag3eVdP38iURj81fTCa-sC2jV_eCkTIjXUQf1I96fCI,5297
97
+ sglang/srt/models/llava.py,sha256=ny3sK2sgYwrEhawSAc1tZeltcgukphSTdxsqyq-Epkc,24857
98
+ sglang/srt/models/llavavid.py,sha256=ztS5He-NF4fmfujdoMnKljOG1fNfPvp-6bduT7B6EMU,12137
99
+ sglang/srt/models/minicpm.py,sha256=LpUdxKA27z79DSYAPPlfCgI4GEnWCYznhgSQl-QCsTY,13731
100
+ sglang/srt/models/minicpm3.py,sha256=-fLZ-RRbR2jLGSsatBWV-qsSNIZCPbS_jasmrOlUdK8,25023
100
101
  sglang/srt/models/mistral.py,sha256=tiYoKjyYVzlQl52QUZ33odD2yCxj9dxcqln474VuZOw,744
101
- sglang/srt/models/mixtral.py,sha256=BonqX_rSB_UuBDQe3uy8-NOxB4Q4s2mTxTQItvFB9ZQ,13864
102
- sglang/srt/models/mixtral_quant.py,sha256=SAHBIiD5O1TnojCpqTLcPy3TEvfSCKeOe3GC47fdFSg,14039
103
- sglang/srt/models/olmoe.py,sha256=ghhNpZe4SzaZEpw0APYBbAmLb3LBagRC2N724RkOkH4,15312
104
- sglang/srt/models/qwen.py,sha256=IrOKHS7b4SL2fnJegq811eeHnAQDya2PujIgKQ9URVY,9921
105
- sglang/srt/models/qwen2.py,sha256=B7hXnW5uYPmpMgSN7tI3tTvMEmmQLpddsw_iNTiaHJI,12398
106
- sglang/srt/models/qwen2_moe.py,sha256=MK-9W6FJhXoQYayg_jpXjKKq4n5j3s2b2ZaoCBfVJ2I,17120
107
- sglang/srt/models/stablelm.py,sha256=ldtlRG1XGdYcjwqb48dpMTfbdh8KHUjcWrrUYNJ0MEk,11326
108
- sglang/srt/models/torch_native_llama.py,sha256=c5GJ_k9zbSOk0PjLCXAK8YebGEy0RUVYZ9_h6_19A3M,19215
109
- sglang/srt/models/xverse.py,sha256=i11wEKqqVCoVtH7yo9jfpNyGHxhw7NvTPid3ojmg79s,13634
110
- sglang/srt/models/xverse_moe.py,sha256=JwkBhsyusP7e_hAMnomkP8cEmKNCLJPRtwaTERQ0D0M,15818
111
- sglang/srt/models/yivl.py,sha256=N3noJ5M-FiZS-E_zfaJs4prQOu_ineRt11MWloYgOR8,4826
112
- sglang/srt/openai_api/adapter.py,sha256=bQ2lZGEQGAUkITXshdnCPzx6JN9iqYVvIpfD7uO5rN4,51519
113
- sglang/srt/openai_api/protocol.py,sha256=rdSwUAoO5-KLemJOE50xwSUagxY4T1QIiNyCYsTtCi0,9868
114
- sglang/srt/sampling/sampling_batch_info.py,sha256=ec5TMw47q2OCrkp2QwN45Ss1RZ-QYv7-KuGFKyGuvsg,6686
115
- sglang/srt/sampling/sampling_params.py,sha256=Xwh4_M6PP4SWyGV-zNyIhp4XbRKbeU4251ao8UOlZlI,5704
102
+ sglang/srt/models/mixtral.py,sha256=UUqzpOBXsObirmpJz4xstlG82uu4JfXsh-gWQmiKbW0,13803
103
+ sglang/srt/models/mixtral_quant.py,sha256=HPipVG_Gc5Ki0YXg49Rwn2_uvtCCI1IxlA7mVRVFivw,13978
104
+ sglang/srt/models/olmo.py,sha256=lD4VewXK0rVqhttGkOOzaxoqRQgVfV90s8ElStPBBdE,11896
105
+ sglang/srt/models/olmoe.py,sha256=3qHnY1DWBhyx9FWGJGb3a8kewcmEdYZOkYZ1JBx1LWs,15251
106
+ sglang/srt/models/qwen.py,sha256=mjGqo3NkTYfJ2qqztFw8mjKggPT2moW15nQgrq3GxWk,9860
107
+ sglang/srt/models/qwen2.py,sha256=I2ZzH9pVTZdjP1fHlq1qdG4JiWHt1CC6t1EK2gN5Ppc,12337
108
+ sglang/srt/models/qwen2_moe.py,sha256=BaNq8xgZKqjr_fcEBtH4yjBSc3-p4VztPiknVwllcQk,16984
109
+ sglang/srt/models/stablelm.py,sha256=0NWUVsYGhbc_X2eT9x38MaaUhZGmFtMgw_2PBv25Yxw,11265
110
+ sglang/srt/models/torch_native_llama.py,sha256=dtasdhwfRPE1eOcAIFUBsHrDnkjegXvo8WhGlqvXGKk,19154
111
+ sglang/srt/models/xverse.py,sha256=v4OaFdss9oD5YNzXsnjoXE9ffCkXL9U5o0OWLm1vHQQ,13573
112
+ sglang/srt/models/xverse_moe.py,sha256=A8EB82NpozoBplp7Qd8B_kY_3cL-UMydAxYIrhACVPE,15682
113
+ sglang/srt/models/yivl.py,sha256=xcWqkuZ29FmBBJY6aKetwItWIPl-kfXK-QmgdLONles,4765
114
+ sglang/srt/openai_api/adapter.py,sha256=WkYCKVaYTkFdLrySBhlkDyHJVaaHMF7KrhNnmw3L3us,53534
115
+ sglang/srt/openai_api/protocol.py,sha256=EZ6G209rBEDP7cepO2kAYqE8wMe1ksYdN7to1iT97Lw,10248
116
+ sglang/srt/sampling/sampling_batch_info.py,sha256=EAdep3I5qmbDDQJ0Ktrq0ySXJ6DCrTAjniEwFu4ZRqE,7679
117
+ sglang/srt/sampling/sampling_params.py,sha256=ZPHCQq7Bi4P_sxUzdKgYVXZpB_tC-kA7rlLwiW9Ct9A,5781
116
118
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
117
- sglang/srt/sampling/penaltylib/orchestrator.py,sha256=WkTNeDhj9H9rtp2ZZeX6MS2sdKSGlLboE6FcuKrwUo0,10815
119
+ sglang/srt/sampling/penaltylib/orchestrator.py,sha256=kizcPnxtRawmDt6utRuhbk4yfNs5H5mx1DAlDVEZRv8,11328
118
120
  sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=IvYioX53Vq_ji-0Zhcz_r5mUa3T3GaIydVS6K4FhWfE,2557
119
121
  sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=XJZP0C4NFyXgcODbIWXxrgVEjmRgqLdZuVAtoN-LveY,3565
120
122
  sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=0PlANTrR959foTA3Nj5qBE7ndaOZgG-9X6LhzlmEUc8,2533
121
123
  sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=v9jOgA0-I31WcrhIydiFbpy2ZJPLytFLGM98NRPd2sU,2820
122
- sglang/test/few_shot_gsm8k.py,sha256=To7Sdg-DLF8poIQLwiOBYKbkz-1C_gn6H79vIbyPR-o,3860
124
+ sglang/test/few_shot_gsm8k.py,sha256=ll-gNbcv829IwSPXAZt4JIEIu8IR3APCLcX3BHOFVp8,3968
125
+ sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
123
126
  sglang/test/run_eval.py,sha256=NWxeLWmInBgkCvC9Jr_QzF7GfAiBve3Gf1JQrEOlNlU,3899
124
127
  sglang/test/runners.py,sha256=VCmtH08FsAq_JTAKfKo0zB4o-osNMAxxwe4aKcSxr4c,13515
125
128
  sglang/test/simple_eval_common.py,sha256=r0G-9QLycs2ax3RMc44T_61fzMxlpTzv6pececC7lyY,12379
@@ -132,9 +135,9 @@ sglang/test/test_activation.py,sha256=jkdNRzJnbd5OgZliQaIXpxovlcky17UrweomcOcMxo
132
135
  sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c,3720
133
136
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
134
137
  sglang/test/test_utils.py,sha256=NkJuezjmonjgC3_i_CTBd8KSqWh6W9CLcgoaqvTNK2U,18684
135
- sglang/test/srt/sampling/penaltylib/utils.py,sha256=-0p0rV-P4lNo7xAe3rQSBHTubc50a-DFyOQmLGAkgkQ,12515
136
- sglang-0.3.3.post1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
137
- sglang-0.3.3.post1.dist-info/METADATA,sha256=xfzfAtRkt_PcB8Lw34-Jckq-iukmhDnhu-_8e9SZ3_Y,39186
138
- sglang-0.3.3.post1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
139
- sglang-0.3.3.post1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
140
- sglang-0.3.3.post1.dist-info/RECORD,,
138
+ sglang/test/srt/sampling/penaltylib/utils.py,sha256=Koe8GYoxIBUCz71of0oHhM5t5QcEd6a1IYq5SszRFAw,12730
139
+ sglang-0.3.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
140
+ sglang-0.3.4.dist-info/METADATA,sha256=rrkwX2teVdp79NEuOJfTBPUYCs_72LHIabuIesToPdI,40738
141
+ sglang-0.3.4.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
142
+ sglang-0.3.4.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
143
+ sglang-0.3.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5