sglang 0.4.1__py3-none-any.whl → 0.4.1.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. sglang/bench_offline_throughput.py +1 -0
  2. sglang/bench_serving.py +11 -3
  3. sglang/lang/backend/openai.py +10 -0
  4. sglang/srt/configs/model_config.py +11 -2
  5. sglang/srt/constrained/xgrammar_backend.py +6 -0
  6. sglang/srt/layers/attention/__init__.py +0 -1
  7. sglang/srt/layers/attention/flashinfer_backend.py +54 -41
  8. sglang/srt/layers/attention/triton_ops/extend_attention.py +20 -14
  9. sglang/srt/layers/logits_processor.py +30 -2
  10. sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +63 -30
  11. sglang/srt/layers/moe/topk.py +14 -0
  12. sglang/srt/layers/quantization/fp8.py +42 -2
  13. sglang/srt/layers/quantization/fp8_kernel.py +91 -18
  14. sglang/srt/layers/quantization/fp8_utils.py +8 -2
  15. sglang/srt/managers/io_struct.py +29 -8
  16. sglang/srt/managers/schedule_batch.py +22 -15
  17. sglang/srt/managers/schedule_policy.py +1 -1
  18. sglang/srt/managers/scheduler.py +71 -34
  19. sglang/srt/managers/session_controller.py +102 -27
  20. sglang/srt/managers/tokenizer_manager.py +95 -55
  21. sglang/srt/managers/tp_worker.py +7 -0
  22. sglang/srt/managers/tp_worker_overlap_thread.py +5 -0
  23. sglang/srt/model_executor/forward_batch_info.py +42 -3
  24. sglang/srt/model_executor/model_runner.py +4 -6
  25. sglang/srt/model_loader/loader.py +22 -11
  26. sglang/srt/models/gemma2.py +19 -0
  27. sglang/srt/models/llama.py +13 -2
  28. sglang/srt/models/llama_eagle.py +132 -0
  29. sglang/srt/openai_api/adapter.py +79 -2
  30. sglang/srt/openai_api/protocol.py +50 -0
  31. sglang/srt/sampling/sampling_params.py +9 -2
  32. sglang/srt/server.py +45 -39
  33. sglang/srt/server_args.py +17 -30
  34. sglang/srt/speculative/spec_info.py +19 -0
  35. sglang/srt/utils.py +62 -0
  36. sglang/version.py +1 -1
  37. {sglang-0.4.1.dist-info → sglang-0.4.1.post2.dist-info}/METADATA +5 -5
  38. {sglang-0.4.1.dist-info → sglang-0.4.1.post2.dist-info}/RECORD +41 -39
  39. {sglang-0.4.1.dist-info → sglang-0.4.1.post2.dist-info}/LICENSE +0 -0
  40. {sglang-0.4.1.dist-info → sglang-0.4.1.post2.dist-info}/WHEEL +0 -0
  41. {sglang-0.4.1.dist-info → sglang-0.4.1.post2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,19 @@
1
+ from enum import IntEnum, auto
2
+
3
+
4
+ class SpeculativeAlgorithm(IntEnum):
5
+ EAGLE = auto()
6
+
7
+ def is_eagle(self):
8
+ return self == SpeculativeAlgorithm.EAGLE
9
+
10
+ @staticmethod
11
+ def from_string(name: str):
12
+ name_map = {
13
+ "EAGLE": SpeculativeAlgorithm.EAGLE,
14
+ }
15
+ return name_map[name]
16
+
17
+
18
+ class SpecInfo:
19
+ pass
sglang/srt/utils.py CHANGED
@@ -1273,3 +1273,65 @@ def dataclass_to_string_truncated(data, max_length=2048):
1273
1273
  )
1274
1274
  else:
1275
1275
  return str(data)
1276
+
1277
+
1278
+ TOOLS_TAG_LIST = ["<|plugin|>", "<function=", "<tool_call>", "<|python_tag|>"]
1279
+
1280
+
1281
+ def parse_tool_response(text, tools, **kwargs):
1282
+ """Parse model response containing tool information.
1283
+
1284
+ Args:
1285
+ text(str): model response in string format
1286
+ tools(List): tools from user request
1287
+ """
1288
+ if "<|plugin|>" in text: # internlm2
1289
+ text, action = text.split("<|action_start|><|plugin|>")
1290
+ action = action.split("<|action_end|>".strip())[0]
1291
+ action = action[action.find("{") :]
1292
+ action = json.loads(action)
1293
+ name, parameters = action["name"], json.dumps(
1294
+ action.get("parameters", action.get("arguments", {})), ensure_ascii=False
1295
+ )
1296
+ call_info_list = [(name, parameters)]
1297
+ elif "<function=" in text: # llama3.1
1298
+ action, _ = text.split("</function>")
1299
+ parameters = action[action.find("{") :]
1300
+ name = action.split("<function=")[1].split(">{")[0]
1301
+ call_info_list = [(name, parameters)]
1302
+ elif "<tool_call>" in text and "</tool_call>" in text: # qwen2.5
1303
+ # get tool_call in text
1304
+ pattern = r"<tool_call>(.*?)</tool_call>"
1305
+ match_result_list = re.findall(pattern, text, re.DOTALL)
1306
+ call_info_list = []
1307
+ for match_result in match_result_list:
1308
+ action = json.loads(match_result)
1309
+ call_info_list.append(
1310
+ (action["name"], json.dumps(action["arguments"], ensure_ascii=False))
1311
+ )
1312
+ # get text outside of tags
1313
+ if not text.startswith("<tool_call>"):
1314
+ text = text[: text.find("<tool_call>")]
1315
+ elif not text.endswith("</tool_call>"):
1316
+ text = text[text.rfind("</tool_call>") + len("</tool_call>") :]
1317
+ else:
1318
+ text = ""
1319
+ elif "<|python_tag|>" in text: # llama3.2
1320
+ _, action = text.split("<|python_tag|>")
1321
+ action = json.loads(action)
1322
+ name, parameters = action["name"], json.dumps(
1323
+ action.get("parameters", action.get("arguments", {})), ensure_ascii=False
1324
+ )
1325
+ call_info_list = [(name, parameters)]
1326
+ else:
1327
+ raise RuntimeError(f"Unexpected model response: {text}")
1328
+
1329
+ call_info_list = [
1330
+ (
1331
+ [tool.function.name for tool in tools].index(call_info[0]),
1332
+ call_info[0],
1333
+ call_info[1],
1334
+ )
1335
+ for call_info in call_info_list
1336
+ ]
1337
+ return text, call_info_list
sglang/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.4.1"
1
+ __version__ = "0.4.1.post2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sglang
3
- Version: 0.4.1
3
+ Version: 0.4.1.post2
4
4
  Summary: SGLang is yet another fast serving framework for large language models and vision language models.
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -243,7 +243,7 @@ Requires-Dist: torch; extra == "srt"
243
243
  Requires-Dist: vllm<=0.6.4.post1,>=0.6.3.post1; extra == "srt"
244
244
  Requires-Dist: cuda-python; extra == "srt"
245
245
  Requires-Dist: flashinfer==0.1.6; extra == "srt"
246
- Requires-Dist: sgl-kernel>=0.0.2.post8; extra == "srt"
246
+ Requires-Dist: sgl-kernel>=0.0.2.post10; extra == "srt"
247
247
  Provides-Extra: srt-hip
248
248
  Requires-Dist: sglang[runtime_common]; extra == "srt-hip"
249
249
  Requires-Dist: torch; extra == "srt-hip"
@@ -351,15 +351,15 @@ The core features include:
351
351
  - [Backend: SGLang Runtime (SRT)](https://sgl-project.github.io/backend/backend.html)
352
352
  - [Frontend: Structured Generation Language (SGLang)](https://sgl-project.github.io/frontend/frontend.html)
353
353
 
354
- ## Benchmark And Performance
354
+ ## Benchmark and Performance
355
355
  Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/), [v0.4 blog](https://lmsys.org/blog/2024-12-04-sglang-v0-4/)
356
356
 
357
357
  ## Roadmap
358
358
  [Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
359
359
 
360
360
  ## Adoption and Sponsorship
361
- The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, xAI, 01.AI and DataCrunch.
361
+ The project is supported by (alphabetically): AMD, Baseten, DataCrunch, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, LMSYS.org, Meituan, NVIDIA, RunPod, Stanford, UC Berkeley, UCLA, xAI, 01.AI.
362
362
 
363
363
  ## Acknowledgment and Citation
364
364
  We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
365
- Please cite our paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
365
+ Please cite the paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
@@ -1,17 +1,17 @@
1
1
  sglang/__init__.py,sha256=b2oIdWzp5P8SzieeOs2TzJoN3Do3tfJbV8gZS_imVcs,1619
2
2
  sglang/api.py,sha256=NdO6cYnklnEBQBKqQjlqI8-P1EownKQ71t5ibCGhEVo,6953
3
3
  sglang/bench_latency.py,sha256=oZjSAzX7dUiSu-zdz0dkyUPo-qAX_lsXFH1gf03akgI,76
4
- sglang/bench_offline_throughput.py,sha256=iQiJCK3KQDCdwU1NVbIwbtthssWzBXiIsKUDA7Z_hO0,12510
4
+ sglang/bench_offline_throughput.py,sha256=r-uBvpnx-30mAnVwQB4WlqiXxy2fn5a1NUARwZcaIo4,12533
5
5
  sglang/bench_one_batch.py,sha256=jkyMhK0lqn5dRCYgAh30qZrNHP4gAbXODymBMNXK86I,15859
6
6
  sglang/bench_one_batch_server.py,sha256=-fV9FTLNNcSIy0pgYeggXedPVK0fVsXZqVQswT8OMOY,5945
7
- sglang/bench_serving.py,sha256=3VQatM51v9f55aUQQ5crYMxxKHr1AbThicsWfBy_tjU,53190
7
+ sglang/bench_serving.py,sha256=YQiCZreejCPBTqMmZsCB99RMi1N-Jx-dZtaafcQ8-14,53377
8
8
  sglang/check_env.py,sha256=4OqpZaEJOfBM6-vtPILto5kqDmgiZM1Koc7lK78A7CI,8427
9
9
  sglang/global_config.py,sha256=fnT0U9vlHdGaQFKN9tYTnUF4-eVW4HYQURd5zvPtrg0,1286
10
10
  sglang/launch_server.py,sha256=4y2QeSj0wVNB9MJQZeahD4ahTDU6gwqo7MPUytyFop0,403
11
11
  sglang/launch_server_llavavid.py,sha256=tGc17S1vUfLwbi1GB26oOdXxTWr7gjlqpTrPnrMRNO8,1007
12
12
  sglang/llama3_eval.py,sha256=gWSboDchIGybIce88bJlrCG0yiLZ513mw4gcutJlzGM,10017
13
13
  sglang/utils.py,sha256=23jf4Mz8E5p5a6JOkjnfYZixdjZUk88F_mZ8rZcby5Q,11597
14
- sglang/version.py,sha256=pMtTmSUht-XtbR_7Doz6bsQqopJJd8rZ8I8zy2HwwoA,22
14
+ sglang/version.py,sha256=1g4t88smWYt1DD5SJZdzXI13OPbsQPOEMX9twA4wq6A,28
15
15
  sglang/lang/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  sglang/lang/chat_template.py,sha256=cnfjjxIIcYRGRxXlJlOGnpFxFuhMHut7DS52LsOMKcA,15826
17
17
  sglang/lang/choices.py,sha256=-W1DVw9N9ZliVpvmWrzIXG4cswAah8eMQrHWzkS3D8o,6234
@@ -23,7 +23,7 @@ sglang/lang/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
23
23
  sglang/lang/backend/anthropic.py,sha256=EXRX7xJgA5KZszX7toSLVnKzFQ5EO0Loj-YjHFtxSxg,2081
24
24
  sglang/lang/backend/base_backend.py,sha256=tdoh9YF3CyekY1BKiX9n7-aA4srDWIuA4RDJLM7q8qg,1985
25
25
  sglang/lang/backend/litellm.py,sha256=ugmL7sfUxkUHVbHtwNzHgdQAEd4UCjNQboFuE3KThcY,2450
26
- sglang/lang/backend/openai.py,sha256=qM7eVH_kMxnDd2rpxOH0v76KxtOJFlAwgLgWIKvFGCI,15060
26
+ sglang/lang/backend/openai.py,sha256=ha9a2P6T80TmSgYlyIwB1qYawWkjcOgiOptkktkqa1U,15436
27
27
  sglang/lang/backend/runtime_endpoint.py,sha256=dfs-yZ1ekKmnbpZLluQHWPmMeZJKbaaZRRGYRa9eBE8,10541
28
28
  sglang/lang/backend/vertexai.py,sha256=O-iBLD-y3vq80UxnrAoJri7bxpgd-_eakZ88Cf8bEGA,4855
29
29
  sglang/srt/_custom_ops.py,sha256=Y4gyTDGhWz-W2Igq25Ojm8XFiyvkawW9I-79iwYvxJ0,3574
@@ -32,20 +32,20 @@ sglang/srt/conversation.py,sha256=u9zFU8aMYzwHUbQRKU76B_T-jfLlPoxUcWG_nRbDM2I,21
32
32
  sglang/srt/hf_transformers_utils.py,sha256=38Ms0H2-VMerOS6jnczcFtZMS6lhw9B5rSWKAfxVUfQ,7945
33
33
  sglang/srt/mm_utils.py,sha256=1ScBunw_x4W8ebM_AcJ62-1T2mfT8NlMJqdAhkF1lb0,12367
34
34
  sglang/srt/model_parallel.py,sha256=eLXZhvJ4wG6dh0FontNCIdVZvHYdWgaeY-5cu7TD9tE,6078
35
- sglang/srt/server.py,sha256=E9YKKXpXv3vPvRy0-cgcy0-5UA-OZz42-32EZWKTicA,34661
36
- sglang/srt/server_args.py,sha256=LgnQ-kBJZ3E7hMMZj9bSK0mn7Bhjk1nJHxLcxl-lGTM,34572
37
- sglang/srt/utils.py,sha256=J8kFl6kDBwFZCM6AKaVTiqdhJKRg0JOH0pNrD1ZeWmM,41726
35
+ sglang/srt/server.py,sha256=sDERAZlRa6OTaUk-SfW5aKJbPui1COpPG34HDlMHMNc,34916
36
+ sglang/srt/server_args.py,sha256=lBMOME7OSqG0Opinsin-QsioNrWpp2M0ZosB8cXoRrY,33917
37
+ sglang/srt/utils.py,sha256=i8MjcaSQjPPfPZ0txufTtqLr4Q7YhHQ86L1i9j-y5yY,44131
38
38
  sglang/srt/configs/__init__.py,sha256=_usVIXHQjft4PAJ1Y-yGQOn2QNOv501GYMlQwpGXbns,208
39
39
  sglang/srt/configs/device_config.py,sha256=dResqHjkg_dq10v6rnVpbXpvABZRB0jylOm-2_JAnx0,428
40
40
  sglang/srt/configs/exaone.py,sha256=Duxd4yQoKy8GWEzZD_kCY_OzmN_67CTJL_Kgn0eXk3g,10731
41
41
  sglang/srt/configs/load_config.py,sha256=TcPi_HY6xu5SiVZsxPOoB5pGeDUNebOk7muoUH9VBDg,3083
42
- sglang/srt/configs/model_config.py,sha256=vVarlLTw9Ged1PXIwRP-R8UhiG6oaezNIZhTNuF0eQc,16070
42
+ sglang/srt/configs/model_config.py,sha256=QP_6WaWMrE4NNF-XODRomiQPO0FABmVZIj5A-qJfnYg,16427
43
43
  sglang/srt/configs/qwen2vl.py,sha256=ZjLy9v2eZY4wptUfY3CWgYKg2B5DDrkfCSyTy_Zf_bg,4351
44
44
  sglang/srt/constrained/__init__.py,sha256=UWZNVLvOT5ZBX8M36sONgDmnKtkQ0cSfhQD2jO0ATuk,786
45
45
  sglang/srt/constrained/base_grammar_backend.py,sha256=FhVm7PxhXDl0joV9NP5RjKgz7dR1dZvUAQnh0mdtvVY,2353
46
46
  sglang/srt/constrained/outlines_backend.py,sha256=CipNHNNXs8xtnJNVNe6FCwZUlSbIXbGmWVlZz3hUpFQ,6820
47
47
  sglang/srt/constrained/outlines_jump_forward.py,sha256=iZWXeR3gNYoMubLGyFmLPO4V2YsN5DiGjD71Xk9iFaE,6418
48
- sglang/srt/constrained/xgrammar_backend.py,sha256=4It9_GqU4UZFhxIw_7hkzpXaMPUtksk6Xfe0Agsfw7A,4620
48
+ sglang/srt/constrained/xgrammar_backend.py,sha256=76oUFXeB29bfnEVWa1-rIrwQm5jhuMlzAX10HtAq1fQ,4887
49
49
  sglang/srt/distributed/__init__.py,sha256=__tl9Frrf3PFrSyNYcn5i-y2rL-J4-Qn6RJwrsZ4xgc,83
50
50
  sglang/srt/distributed/communication_op.py,sha256=ZoIhboZyefiAwr-1K-wF3rAFSQ4Wt-RxXpsX443Gbt4,1157
51
51
  sglang/srt/distributed/parallel_state.py,sha256=HplRH5S0AWdwSdhoHYX9_UWQZlFjh2Z1LHaz68EXlpE,47555
@@ -63,49 +63,49 @@ sglang/srt/layers/activation.py,sha256=EboMjT9HV2tNHQ6rzpojtlkzev1lAFbhQlxMg9hwx
63
63
  sglang/srt/layers/custom_op_util.py,sha256=0vu-yX2wwonmO1L_o5G7SA6C-8XuhDIh9rPDvNeLhoc,922
64
64
  sglang/srt/layers/layernorm.py,sha256=nRQ1w1xSUcU-zlqVC61BnGG6otS5W1w9VaSzeXizrx4,4037
65
65
  sglang/srt/layers/linear.py,sha256=KyRFU0VcoNuN-hnQB9QQcBN9NCpeqPtLzzufIHUpV6w,47064
66
- sglang/srt/layers/logits_processor.py,sha256=JlOU0x8vBGIuTwHSdjR6Kly9_uzilBMv0NE_rvUx0W4,14747
66
+ sglang/srt/layers/logits_processor.py,sha256=Imh-qY1D9J80DZVSVV0LfTiHMEw6oQ3JbY9lXxPZAXE,15656
67
67
  sglang/srt/layers/pooler.py,sha256=rj2lygvleBnyLCBZ8I11HGMgpfIDsT0l3PIkshJwdu4,1606
68
68
  sglang/srt/layers/radix_attention.py,sha256=E4cmvkcCdCtb6VyLNrCKy1D6VwHQ063oH3JQXPaRy6w,2178
69
69
  sglang/srt/layers/rotary_embedding.py,sha256=29tx3JNR40AoXqBa2cFGBjva9vU2xgFipETlpMaaZas,3985
70
70
  sglang/srt/layers/sampler.py,sha256=k4Op_HMkQfT7t9wgQwBVotfTUXEocrzRyQqEFnff1pc,5511
71
71
  sglang/srt/layers/torchao_utils.py,sha256=dQVuWNXxAvOPjr2G5BBMWqC2oKcS2B52rx-fEc_elmc,3545
72
72
  sglang/srt/layers/vocab_parallel_embedding.py,sha256=slGwLiWjuFLCUdRe-GTlfumyZpqVX9VF6No_UGOT-hA,21624
73
- sglang/srt/layers/attention/__init__.py,sha256=KIJhzOJWYioQE7Va4D83-V-ZUZVMZcczuNgDC3dlSRo,2583
73
+ sglang/srt/layers/attention/__init__.py,sha256=lNLfWqePc5NMej-AcXl97vxVXsxQOgP7dNNb2ibyUWI,2562
74
74
  sglang/srt/layers/attention/double_sparsity_backend.py,sha256=RQdEKRykSLf9ilnaHmR6T7RFqh4emH_adfB3aJN2BUU,10920
75
- sglang/srt/layers/attention/flashinfer_backend.py,sha256=umD1E2zvMnPbbgvx2Ex5LQB6a4a41brjsks1M0gFMMU,26357
75
+ sglang/srt/layers/attention/flashinfer_backend.py,sha256=8nH4EIEXvNk9yZVl7mSn78w5Dli5UiWL-ZCeYykG9HI,27280
76
76
  sglang/srt/layers/attention/torch_native_backend.py,sha256=nQdeqWEMMH_wrod5wssDCJG-uPKm0uslvkALKqPRPQ8,10509
77
77
  sglang/srt/layers/attention/triton_backend.py,sha256=-TobyZHwlbJ5HhbFg-jgCqVOw4Y-opgEuFo-EusASQc,6264
78
78
  sglang/srt/layers/attention/triton_ops/decode_attention.py,sha256=oJ_UK1t229zF3hbTDiQe7t-X-IbM2dOxx4U2ch-vmjA,17847
79
79
  sglang/srt/layers/attention/triton_ops/double_sparsity_attention.py,sha256=1pSXfY3EEaM7iRN_uElHnAfsrJMhTFbu9fj8Z0O2PbE,21480
80
- sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=tZJhzqcf1KKMT8z7_32eVk_D1NHP71c-S3UNxemfAHM,11542
80
+ sglang/srt/layers/attention/triton_ops/extend_attention.py,sha256=DWOZXSTVN5ZbcFjDjcqs-nPdUkxSwum0SVXhVKqwh2g,11688
81
81
  sglang/srt/layers/attention/triton_ops/prefill_attention.py,sha256=lojFXRZMLWkzS2Y8uxaolnQhXaWKG19mCAWaF5KQeiI,6087
82
82
  sglang/srt/layers/moe/fused_moe_native.py,sha256=8q-LFZMSCGLc2_Gltp2lH0gSb4A1WOuKQW3wo3rpj5g,1601
83
- sglang/srt/layers/moe/topk.py,sha256=YjIiFqMERvkChkwZUqTrL_xaQyzsYsZzVUe4PzAhRZI,6299
83
+ sglang/srt/layers/moe/topk.py,sha256=JpeIl_-CNk0yyG3k5fmmNbbmR2_9bkKC23UoLOlMkjw,6954
84
84
  sglang/srt/layers/moe/ep_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  sglang/srt/layers/moe/ep_moe/kernels.py,sha256=wb_S2qLxoWWgQu9coXy0XLNGvHzdZSdwXr0PGy4QySg,10940
86
86
  sglang/srt/layers/moe/ep_moe/layer.py,sha256=6iQU5ZjQ8IXGoQ8ZlBuJqyQxYTEem9vXI6rbVIWKlZw,22303
87
87
  sglang/srt/layers/moe/fused_moe_triton/__init__.py,sha256=h9yMFAL_bagUf-qBED8gSWdCOb7d8IdA-pE-L_nIg8E,842
88
- sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=GVugCKapd3CvgkvPQ_FmQplC12-grv3n1FRkLJc6WhY,30790
88
+ sglang/srt/layers/moe/fused_moe_triton/fused_moe.py,sha256=KvOy544x_4nRqg50o5YHQpHvF8TUD7q9LXDAWPGJlAA,31796
89
89
  sglang/srt/layers/moe/fused_moe_triton/layer.py,sha256=BclDj5JyCy-8Bfue4broL1-IG6a4dUyggE9WQLa06sg,20575
90
90
  sglang/srt/layers/quantization/__init__.py,sha256=VPYXShHvbvkOgVBlkIqic4RhdJ1y6EZ3r34T-nZMT1k,4606
91
91
  sglang/srt/layers/quantization/base_config.py,sha256=daK9p0aijMszLUm1W4Pc33FK87MdqYK1NoWFKif-j80,4599
92
- sglang/srt/layers/quantization/fp8.py,sha256=wNnpXLroIl7D98mlfCiXZPE9hrP5ricHrXY1WZBzEEo,30810
93
- sglang/srt/layers/quantization/fp8_kernel.py,sha256=v4-7hCQFyuUSZmeJS_5VDCu6a1-EGWXQ088FdPTjO_0,8137
94
- sglang/srt/layers/quantization/fp8_utils.py,sha256=HBJBaNcln1NrLxzw0ppUjMd6w-ryuGDDHCYJq7mRQac,4035
92
+ sglang/srt/layers/quantization/fp8.py,sha256=k4mw-iKxlaEWRkGgaoxCLzZ_dYydyRj0y1N1B_umMwU,32668
93
+ sglang/srt/layers/quantization/fp8_kernel.py,sha256=cYF4ckqrUyhCO9Ha7zi05R8EhRaqSa8rFpYisz-9Ed0,10743
94
+ sglang/srt/layers/quantization/fp8_utils.py,sha256=qBVJXxbxqmf8-Juq0t-IXWjlaZoePJqFNYcs9-oT5Yo,4150
95
95
  sglang/srt/lora/lora.py,sha256=-o2mBmUvoVpdkgdAkWTARN4kfyep3UNEJLcg6moh0SU,15056
96
96
  sglang/srt/lora/lora_config.py,sha256=a2fTQESlCbG1xLiBYy4ptZ6c0Burcqyg1_6V1XSok-Y,1506
97
97
  sglang/srt/lora/lora_manager.py,sha256=DHiqdl0_4wQ5PxZBZtlCpP14515mDV2_H9tzL3Rdss8,12886
98
98
  sglang/srt/managers/data_parallel_controller.py,sha256=psI4FAuBGjtdnEuwagnGdtRqvqSSxOROfNKQqVDqlVA,8382
99
99
  sglang/srt/managers/detokenizer_manager.py,sha256=nZkbwt4yty_oy8rvg4T7PbgyVLoBLohvHl25xlQpBoo,8439
100
100
  sglang/srt/managers/image_processor.py,sha256=Y8RgyrzbJjJTpjbnZDa5qiiG5wWjZ68rOXUPDi6kkFo,13698
101
- sglang/srt/managers/io_struct.py,sha256=_LWWqT3LNwZGaWhg2d3kTg1V2MTHKzRasCvxF9Nfpi4,15429
102
- sglang/srt/managers/schedule_batch.py,sha256=qryPWCdOTFzxomDa80U-5guShOb1K4kBUWcPCCchYB8,45762
103
- sglang/srt/managers/schedule_policy.py,sha256=cLNi__smbg02keWgUMfB_nEM3vllocPB0XyG1P5qO7I,15469
104
- sglang/srt/managers/scheduler.py,sha256=3Olw4Yf4Qtn1i4PqK3PT9hkXYGE8nemL2_Xjn8JLxAQ,61819
105
- sglang/srt/managers/session_controller.py,sha256=Yp-IV3rXczACZxZXmF-QxW9CWICGy8KHQ9ttBGJ8WXA,2800
106
- sglang/srt/managers/tokenizer_manager.py,sha256=Vta7Lysvh4rPWqEB00shqAzpGUfv7GdPETDqFCU8RxA,31556
107
- sglang/srt/managers/tp_worker.py,sha256=X1EwFX3FSsmXx7jeeX2tjZRocaujabQYWm-M-0CFEBE,7363
108
- sglang/srt/managers/tp_worker_overlap_thread.py,sha256=-QNBJRKxraa9Xt2WI1AFzZYdneIJ1eXv0GjFzDqXoE0,8926
101
+ sglang/srt/managers/io_struct.py,sha256=Gru7LEyc3tcM_LewoteCb7GXIrh-OYhA2CnEvjc1Cis,15769
102
+ sglang/srt/managers/schedule_batch.py,sha256=KnoVuWgINnyard-BOXCo0jm3IMdXN9wIwnftMKcag-s,46097
103
+ sglang/srt/managers/schedule_policy.py,sha256=QxjQ8-le062AMHHxool6CxkhvB4FIwhOQPzTX_JwL6U,15447
104
+ sglang/srt/managers/scheduler.py,sha256=Z1_wf6OCC8Hevc7y0D4Rt4EW5Et9bgTL9oJOkmNmDjo,63490
105
+ sglang/srt/managers/session_controller.py,sha256=3laMRIXEYWDjfytCjPs0vw_Tw__k-nKBY-bYzycYbfc,5482
106
+ sglang/srt/managers/tokenizer_manager.py,sha256=SXvVZHFMBCtcLkKnq-O3uzwrEhfVqk6Y1fzeBEFNq0E,33010
107
+ sglang/srt/managers/tp_worker.py,sha256=8RVBLQaS3TnX7Z4J35RVrFN0M6PVnRBhct3sczBL4dY,7644
108
+ sglang/srt/managers/tp_worker_overlap_thread.py,sha256=JQfrVPeE56ZGJ3nozkhZR-RSb2oePsY7iuedM7XCtdQ,9157
109
109
  sglang/srt/mem_cache/base_prefix_cache.py,sha256=QC8HS8RC5DXu14kyXsxAgEUsn0f932p2DjqzbKjc6Bs,962
110
110
  sglang/srt/mem_cache/chunk_cache.py,sha256=R2gHAuqKd5ayQW3NnsgoGUH31---Z5izCDyCqLL0FjQ,2524
111
111
  sglang/srt/mem_cache/flush_cache.py,sha256=GYcxmNXh4hsMpFfNOuCTpKilW7guZwTtAg_usVeM3J0,979
@@ -114,10 +114,10 @@ sglang/srt/mem_cache/radix_cache.py,sha256=c5voySV5L855c0G9cBEc9iQ4nR7PDDmg0V6fW
114
114
  sglang/srt/metrics/collector.py,sha256=ZWoFx_FKN0sNMSZ8RJWUVQ0RFEYhIHxdw0d4TZTluMU,6861
115
115
  sglang/srt/metrics/func_timer.py,sha256=VFyNRrbnKVCwnQsrlLin1lITJfjQpf9m8sGPqL5LIsQ,3438
116
116
  sglang/srt/model_executor/cuda_graph_runner.py,sha256=1n5WxoE9-0B3unwkkcR355K_D290h2LGt_7EvH02DQM,16246
117
- sglang/srt/model_executor/forward_batch_info.py,sha256=L5mVoW5SaO6To-7nGk0TZM-FFB5_78cARpJ-aC2rwD0,12883
118
- sglang/srt/model_executor/model_runner.py,sha256=Bm3NWTS3xmOGXEJnucnJZQldpVOzu-DCEUfaJy_PTU0,30104
117
+ sglang/srt/model_executor/forward_batch_info.py,sha256=vqF8XrHQPk3ZL7HqPvvkfP53oqBx0Fajb5lAIkdifBo,13961
118
+ sglang/srt/model_executor/model_runner.py,sha256=TjvAwwr7EqZdmE-5HbuQMeEa0e0FqY6LeqqzEAHXMPU,30012
119
119
  sglang/srt/model_loader/__init__.py,sha256=zGZkOBz1zx-pkaIy47BasL3fjDlAcxAXUTjInOhXHAE,919
120
- sglang/srt/model_loader/loader.py,sha256=VBrY4W9CiVvS_D8yXhdkW9jReV9rSMSkJplabz0Fxgk,43528
120
+ sglang/srt/model_loader/loader.py,sha256=7OG_8-66vFDFZ9kVKGNK1BFBjZ6ql449dlyvdCbMqvE,43876
121
121
  sglang/srt/model_loader/utils.py,sha256=0NaMR67fESFopaklmsleiL27XH1QUrjZW246MUu1EJ0,1369
122
122
  sglang/srt/model_loader/weight_utils.py,sha256=kQo9KPThjH3HAOCfC_tdwdrshdWuWJOVpPR0skSyaRY,24193
123
123
  sglang/srt/models/baichuan.py,sha256=PzBOFcEAixakPEkQSaJwC0Xc1fu-yCsN9T0I67r8QmY,14919
@@ -128,7 +128,7 @@ sglang/srt/models/deepseek.py,sha256=_cVOvR6eSEgRf6TUBpTD5uMdijDWFw4sSt4lGzl8tbg
128
128
  sglang/srt/models/deepseek_v2.py,sha256=-v_OJr2c3gJ0NMxQjvT3Jknz1XPGkzKx0TVR3NIiC6A,37284
129
129
  sglang/srt/models/exaone.py,sha256=dkERTZVxrRroqu5AGLP7D4N6n8HvDqlNaDQUIe15mZY,13038
130
130
  sglang/srt/models/gemma.py,sha256=ydRqsG-7004r1fAiz01LHUmcj_6XN0Tn4xO1keJnMQk,12126
131
- sglang/srt/models/gemma2.py,sha256=41PlW8pMb4rMETdAni_JWDhZeIn_QsTQireAyUjsURA,15848
131
+ sglang/srt/models/gemma2.py,sha256=-bFN-Te3YWAunLCrF-XFk_6fJS7gHM4Ca6h6aesXUTM,16362
132
132
  sglang/srt/models/gemma2_reward.py,sha256=nJ01KfqLSJtqMLm3sG8p2mGZFK1xhhjh7I7Ccb-_Hq8,2494
133
133
  sglang/srt/models/gpt2.py,sha256=2je1kE09sGcaORWnJuGYAkcwwOrT9EK-KhQaoCKjCSA,9517
134
134
  sglang/srt/models/gpt_bigcode.py,sha256=tovyOdJu2x3LkzmkdFXX_iJdkxuyChIDxwgvPBy6UPo,9528
@@ -136,8 +136,9 @@ sglang/srt/models/granite.py,sha256=AeQY9Dxd1ZnwgCYBK0vSXXiMGM-yt9iaOVf_ruOUHXw,
136
136
  sglang/srt/models/grok.py,sha256=J9lgNbFebvXgF19nfZyHwlGPlGWY_m0LgP506YvOYrU,15668
137
137
  sglang/srt/models/internlm2.py,sha256=_xcKtd6YtEFUTozaN-yUb0xbSYckRpomfPSKcAk4j-Y,12127
138
138
  sglang/srt/models/internlm2_reward.py,sha256=8K26A9oIFFGx_9U2mF87j7FX8K87HGKMnVL3ht1Uc7I,2398
139
- sglang/srt/models/llama.py,sha256=S7nS05hhFGghXu0v-w9RZyBTY6OCEVF5Aaw4GX_E_9g,19929
139
+ sglang/srt/models/llama.py,sha256=4UPKF7erp7qqBD11uvvQkO1Fo_wDs71BmA8Y2csXRcA,20302
140
140
  sglang/srt/models/llama_classification.py,sha256=DwboM1xHXdf3Fddf7xGnrfdOLJwXdiJs994cIpAPa2g,2984
141
+ sglang/srt/models/llama_eagle.py,sha256=88DzR54DKBIKJ1h-bkIa8mc1qJnlkdZ1eGYY3c5mpBY,4442
141
142
  sglang/srt/models/llama_embedding.py,sha256=rh-AiczPY_pTpzcACHvSMVjh1hsV_MZBBwP0LQxPsGM,3130
142
143
  sglang/srt/models/llama_reward.py,sha256=oPxh5E2UkxLULNdR68dFvt2I7j33CJFN6nyA-8L2_cg,4516
143
144
  sglang/srt/models/llava.py,sha256=xrkg8sht8tBOID7427IEZtHL-KKWfEivDe2NqGjTSAs,26373
@@ -162,16 +163,17 @@ sglang/srt/models/torch_native_llama.py,sha256=YeXHorFm6QfnczLXwPb5TG9a-He0uiA9R
162
163
  sglang/srt/models/xverse.py,sha256=Oq--KqvbYu2H4TMVGEHpSnJLEwXBpxlncR9ilsQeckc,13579
163
164
  sglang/srt/models/xverse_moe.py,sha256=7E60YIST4ELYwLRgjtHiLRI5Uyc7XqQTM7jQXiWaQs4,15541
164
165
  sglang/srt/models/yivl.py,sha256=88OubtuZ38Dxb2LzfV_MTPBI4wKhh4NJqFu--efbhFM,4809
165
- sglang/srt/openai_api/adapter.py,sha256=DbLA4-v-QrKJHYDH4fpDSXqmyz_vpcFE-1tnhh60m6o,54057
166
- sglang/srt/openai_api/protocol.py,sha256=ecRNNqkhwwKZaIoJlPhtp2VTcHxBJDbNN8lrKS7uBx8,10406
166
+ sglang/srt/openai_api/adapter.py,sha256=HvgeFPWv-v8LOiYF2iNCo-14BIZLAPznNTCUbubB2Rg,57091
167
+ sglang/srt/openai_api/protocol.py,sha256=anWGr2Br8gVYm6Z0yvDwjXLaPCPuvJZ28gr5rV2dhVQ,11613
167
168
  sglang/srt/sampling/sampling_batch_info.py,sha256=s--zNjk-LErZ5lMqnZ7KiuJltaziKRbQAU5qYpKIxAc,8564
168
- sglang/srt/sampling/sampling_params.py,sha256=n7RbBg_bS5fYhsiWa8uJYnfoXy_i5DvtTBOkuFnHDNU,5286
169
+ sglang/srt/sampling/sampling_params.py,sha256=BkgCJAOSmQXwJrNXg26zSjKfMy0d5mMN6oHRk_ZuESI,5499
169
170
  sglang/srt/sampling/penaltylib/__init__.py,sha256=5vQw0Y5DSzmsoFg1IdMIKLwFVhYZ5ArADHVBYbSmOec,513
170
171
  sglang/srt/sampling/penaltylib/orchestrator.py,sha256=J-DEemZcKm1--o37kf3qDOE8SZ_6H3d5oex49Mgq2ZU,10762
171
172
  sglang/srt/sampling/penaltylib/penalizers/frequency_penalty.py,sha256=1Zp2aL6dD60mwD1tCcSG0x5IYo0v4z9ce-q_YwbJ9f8,2490
172
173
  sglang/srt/sampling/penaltylib/penalizers/min_new_tokens.py,sha256=_Nxv0XgUPirZjw2SEJYp_Cd9ZcLwmt7h6JE6J4hhFq4,3629
173
174
  sglang/srt/sampling/penaltylib/penalizers/presence_penalty.py,sha256=5tOgCg7OvE9kSN9VMCpH1hwqo1YMxt9iS5PVpct9HpU,2468
174
175
  sglang/srt/sampling/penaltylib/penalizers/repetition_penalty.py,sha256=m22Rfn1RuB1HpImBDECsiJ2VooBYpsFADAwnk1EPzk0,2751
176
+ sglang/srt/speculative/spec_info.py,sha256=d-82uWEC-QBqAgv3XGDNDW8DlHv4MtUsZghFqzGwV7U,352
175
177
  sglang/test/few_shot_gsm8k.py,sha256=7yDbEQe49gZeJhz2wFFX-gf_59ThDKsCS1xwfogNc7k,4034
176
178
  sglang/test/few_shot_gsm8k_engine.py,sha256=QQbrwOX6-cJDD3RZC_e7zPnt6aSo8JdF8X_lRHSjdDM,3886
177
179
  sglang/test/run_eval.py,sha256=9yO0hXZOcn4abEOs96T-XPguDEklK16Ltco0pGF3zCg,4020
@@ -188,8 +190,8 @@ sglang/test/test_layernorm.py,sha256=IacByD5d-stXjzBz8Ypamc7povlcedpKPbb_4JLgo3c
188
190
  sglang/test/test_programs.py,sha256=1Z0umrsUu9pagzyGH5SrXl_qhKSyTfUv_kWC2mcn0qo,18208
189
191
  sglang/test/test_utils.py,sha256=HJG7kUQOk6n9FBbH89PDtQ41C3kt1cfJODhAEcFT0AQ,23823
190
192
  sglang/test/srt/sampling/penaltylib/utils.py,sha256=CjxHgywh0hx_87iynzQt_ztHu6zBVuE-YrZ-XPmW6U4,12906
191
- sglang-0.4.1.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
192
- sglang-0.4.1.dist-info/METADATA,sha256=RlVEQtwr_CCGTs83vNPwWXQukutbFfBz9xBPlXSl6qc,22523
193
- sglang-0.4.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
194
- sglang-0.4.1.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
195
- sglang-0.4.1.dist-info/RECORD,,
193
+ sglang-0.4.1.post2.dist-info/LICENSE,sha256=FJXh51fvTQklojUFY89XVLsjxRcBqOxPs8XNy-2uZ0c,11346
194
+ sglang-0.4.1.post2.dist-info/METADATA,sha256=eORQMKMQDt_eTOh6PxMzIvyjNbg2FllxXss2Z9jU3Ug,22544
195
+ sglang-0.4.1.post2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
196
+ sglang-0.4.1.post2.dist-info/top_level.txt,sha256=yxhh3pYQkcnA7v3Bg889C2jZhvtJdEincysO7PEB09M,7
197
+ sglang-0.4.1.post2.dist-info/RECORD,,