liger-kernel 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/chunked_loss/cpo_loss.py +51 -11
- liger_kernel/chunked_loss/dpo_loss.py +30 -4
- liger_kernel/chunked_loss/functional.py +2 -0
- liger_kernel/chunked_loss/fused_linear_distillation.py +20 -5
- liger_kernel/chunked_loss/fused_linear_ppo.py +331 -0
- liger_kernel/chunked_loss/fused_linear_preference.py +2 -2
- liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +112 -17
- liger_kernel/chunked_loss/grpo_loss.py +137 -61
- liger_kernel/chunked_loss/jsd_loss.py +43 -13
- liger_kernel/chunked_loss/kto_loss.py +50 -12
- liger_kernel/chunked_loss/orpo_loss.py +37 -5
- liger_kernel/chunked_loss/simpo_loss.py +47 -11
- liger_kernel/ops/cross_entropy.py +7 -2
- liger_kernel/ops/dyt.py +225 -0
- liger_kernel/ops/fused_linear_jsd.py +2 -1
- liger_kernel/ops/jsd.py +30 -11
- liger_kernel/ops/kl_div.py +2 -2
- liger_kernel/transformers/__init__.py +4 -0
- liger_kernel/transformers/dyt.py +20 -0
- liger_kernel/transformers/functional.py +5 -0
- liger_kernel/transformers/model/gemma.py +8 -16
- liger_kernel/transformers/model/gemma2.py +7 -16
- liger_kernel/transformers/model/llama.py +8 -15
- liger_kernel/transformers/model/llava.py +369 -0
- liger_kernel/transformers/model/loss_utils.py +57 -0
- liger_kernel/transformers/model/mistral.py +9 -10
- liger_kernel/transformers/model/mixtral.py +8 -15
- liger_kernel/transformers/model/mllama.py +8 -15
- liger_kernel/transformers/model/olmo2.py +8 -16
- liger_kernel/transformers/model/paligemma.py +397 -0
- liger_kernel/transformers/model/phi3.py +8 -15
- liger_kernel/transformers/model/qwen2.py +8 -15
- liger_kernel/transformers/model/qwen2_5_vl.py +204 -0
- liger_kernel/transformers/model/qwen2_vl.py +9 -10
- liger_kernel/transformers/monkey_patch.py +286 -12
- liger_kernel/utils.py +1 -3
- {liger_kernel-0.5.4.dist-info → liger_kernel-0.5.6.dist-info}/METADATA +11 -7
- liger_kernel-0.5.6.dist-info/RECORD +80 -0
- {liger_kernel-0.5.4.dist-info → liger_kernel-0.5.6.dist-info}/WHEEL +1 -1
- liger_kernel/chunked_loss/fused_linear_rlhf.py +0 -213
- liger_kernel-0.5.4.dist-info/RECORD +0 -74
- {liger_kernel-0.5.4.dist-info → liger_kernel-0.5.6.dist-info/licenses}/LICENSE +0 -0
- {liger_kernel-0.5.4.dist-info → liger_kernel-0.5.6.dist-info/licenses}/NOTICE +0 -0
- {liger_kernel-0.5.4.dist-info → liger_kernel-0.5.6.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: liger_kernel
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.6
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -45,6 +45,7 @@ Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
|
45
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
46
46
|
Requires-Dist: mkdocs; extra == "dev"
|
|
47
47
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
48
|
+
Dynamic: license-file
|
|
48
49
|
Dynamic: provides-extra
|
|
49
50
|
Dynamic: requires-dist
|
|
50
51
|
|
|
@@ -115,6 +116,7 @@ Dynamic: requires-dist
|
|
|
115
116
|
<details>
|
|
116
117
|
<summary>Latest News 🔥</summary>
|
|
117
118
|
|
|
119
|
+
- [2025/03/06] We release a joint blog post on TorchTune × Liger - [Peak Performance, Minimized Memory: Optimizing torchtune’s performance with torch.compile & Liger Kernel](https://pytorch.org/blog/peak-performance-minimized-memory/)
|
|
118
120
|
- [2024/12/11] We release [v0.5.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.5.0): 80% more memory efficient post training losses (DPO, ORPO, CPO, etc)!
|
|
119
121
|
- [2024/12/5] We release LinkedIn Engineering Blog - [Liger-Kernel: Empowering an open source ecosystem of Triton Kernels for Efficient LLM Training](https://www.linkedin.com/blog/engineering/open-source/liger-kernel-open-source-ecosystem-for-efficient-llm-training)
|
|
120
122
|
- [2024/11/6] We release [v0.4.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.4.0): Full AMD support, Tech Report, Modal CI, Llama-3.2-Vision!
|
|
@@ -154,7 +156,7 @@ With one line of code, Liger Kernel can increase throughput by more than 20% and
|
|
|
154
156
|
We provide optimized post training kernels like DPO, ORPO, SimPO, and more which can reduce memory usage by up to 80%. You can easily use them as python modules.
|
|
155
157
|
|
|
156
158
|
```python
|
|
157
|
-
from liger_kernel.chunked_loss import
|
|
159
|
+
from liger_kernel.chunked_loss import LigerFusedLinearORPOLoss
|
|
158
160
|
orpo_loss = LigerFusedLinearORPOLoss()
|
|
159
161
|
y = orpo_loss(lm_head.weight, x, target)
|
|
160
162
|
```
|
|
@@ -177,7 +179,7 @@ y = orpo_loss(lm_head.weight, x, target)
|
|
|
177
179
|
- **Exact:** Computation is exact—no approximations! Both forward and backward passes are implemented with rigorous unit tests and undergo convergence testing against training runs without Liger Kernel to ensure accuracy.
|
|
178
180
|
- **Lightweight:** Liger Kernel has minimal dependencies, requiring only Torch and Triton—no extra libraries needed! Say goodbye to dependency headaches!
|
|
179
181
|
- **Multi-GPU supported:** Compatible with multi-GPU setups (PyTorch FSDP, DeepSpeed, DDP, etc.).
|
|
180
|
-
- **Trainer Framework Integration**: [Axolotl](https://github.com/axolotl-ai-cloud/axolotl), [LLaMa-Factory](https://github.com/hiyouga/LLaMA-Factory), [SFTTrainer](https://github.com/huggingface/trl/releases/tag/v0.10.1), [Hugging Face Trainer](https://github.com/huggingface/transformers/pull/32860), [SWIFT](https://github.com/modelscope/ms-swift)
|
|
182
|
+
- **Trainer Framework Integration**: [Axolotl](https://github.com/axolotl-ai-cloud/axolotl), [LLaMa-Factory](https://github.com/hiyouga/LLaMA-Factory), [SFTTrainer](https://github.com/huggingface/trl/releases/tag/v0.10.1), [Hugging Face Trainer](https://github.com/huggingface/transformers/pull/32860), [SWIFT](https://github.com/modelscope/ms-swift), [oumi](https://github.com/oumi-ai/oumi/tree/main)
|
|
181
183
|
|
|
182
184
|
## Installation
|
|
183
185
|
|
|
@@ -312,8 +314,10 @@ loss.backward()
|
|
|
312
314
|
| Mixtral | `liger_kernel.transformers.apply_liger_kernel_to_mixtral` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
313
315
|
| Gemma1 | `liger_kernel.transformers.apply_liger_kernel_to_gemma` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
314
316
|
| Gemma2 | `liger_kernel.transformers.apply_liger_kernel_to_gemma2` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
317
|
+
| Paligemma, Paligemma2, & Paligemma2 Mix | `liger_kernel.transformers.apply_liger_kernel_to_paligemma` | LayerNorm, RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
315
318
|
| Qwen2, Qwen2.5, & QwQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
316
319
|
| Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
320
|
+
| Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
317
321
|
| Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
318
322
|
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
319
323
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -385,8 +389,8 @@ loss.backward()
|
|
|
385
389
|
## Contact
|
|
386
390
|
|
|
387
391
|
- For issues, create a Github ticket in this repository
|
|
388
|
-
- For open discussion, join [our discord channel](https://discord.
|
|
389
|
-
- For formal collaboration, send an email to yannchen@linkedin.com
|
|
392
|
+
- For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
|
|
393
|
+
- For formal collaboration, send an email to yannchen@linkedin.com and hning@linkedin.com
|
|
390
394
|
|
|
391
395
|
## Cite this work
|
|
392
396
|
|
|
@@ -405,7 +409,7 @@ Biblatex entry:
|
|
|
405
409
|
```
|
|
406
410
|
|
|
407
411
|
## Star History
|
|
408
|
-
[](https://star-history.com/#linkedin/Liger-Kernel&Date)
|
|
412
|
+
[](https://www.star-history.com/#linkedin/Liger-Kernel&Date)
|
|
409
413
|
|
|
410
414
|
<p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
|
|
411
415
|
<a href="#readme-top" style="text-decoration: none; color: #007bff; font-weight: bold;">
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
|
|
3
|
+
liger_kernel/utils.py,sha256=178Hn8uD-VauDT6FjqMyXLbKLod8ObIpaTtapHwfEK0,1861
|
|
4
|
+
liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
|
|
5
|
+
liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
|
|
6
|
+
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
|
7
|
+
liger_kernel/chunked_loss/dpo_loss.py,sha256=xZwGqS04si9zXyob95SAdalC-hajZg8fWINqiqffN8k,5855
|
|
8
|
+
liger_kernel/chunked_loss/functional.py,sha256=9G3nKm-Bi7uoZRFkL8wwGMl6juDl4bSzDvTa5GHZPzg,955
|
|
9
|
+
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
|
|
10
|
+
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=-E4AuWY-y2bMo_kAmEQBgQ92UJh3L5IiCRGVcfMJOCE,12731
|
|
11
|
+
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=ojB42jYPu0c4ki96Ft-hy7Sf6fh_WikG-aWNrlZzSio,18362
|
|
12
|
+
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
|
|
13
|
+
liger_kernel/chunked_loss/grpo_loss.py,sha256=6Mb4ZT6MfnOr4Xo681rMR0LKkhzJhInvQp8wp2YVMK0,8913
|
|
14
|
+
liger_kernel/chunked_loss/jsd_loss.py,sha256=u2ahkuHsbhpNaKcpBCz5gCMDk9ou-P04DHji592dIBo,7067
|
|
15
|
+
liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
|
|
16
|
+
liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
|
|
17
|
+
liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
|
|
18
|
+
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
liger_kernel/ops/cross_entropy.py,sha256=T5oSsqOS1y-Iea5o9v_BSU-_mIEXqWAT1oX_m59NcA4,18941
|
|
20
|
+
liger_kernel/ops/dyt.py,sha256=YD1-buHz9VmIX838VKzLc-lm5CeUQ4LAskGDWBUMQHA,6187
|
|
21
|
+
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1Y3Uk_TCSjqKgoG2eot1ptnWXJXXQESqGvOmqAW1gsM,10912
|
|
22
|
+
liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
|
|
23
|
+
liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,4126
|
|
24
|
+
liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
|
|
25
|
+
liger_kernel/ops/jsd.py,sha256=rkloGA7nDfVaa5nKY6-EYBw0E1p_MSsl4fr2xZGTp04,6961
|
|
26
|
+
liger_kernel/ops/kl_div.py,sha256=NkG7D6_DnPBzr-ohhYiQbRBnq_fbGmpn5UU7y0UBKQo,8420
|
|
27
|
+
liger_kernel/ops/layer_norm.py,sha256=6roQjioyg-9O2qLPV8nL4U0-5UH80tdzOMTWwjvDnn8,7961
|
|
28
|
+
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
29
|
+
liger_kernel/ops/rms_norm.py,sha256=PWLJcdIKU5e-8BuYFHd9Cqlq6wmr6fUXKi9zQD4LetU,11727
|
|
30
|
+
liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
|
|
31
|
+
liger_kernel/ops/swiglu.py,sha256=KmgMjaJQnbLLgZn2nEpbwHU_xpnYRweCyrLQSVvM1vA,3015
|
|
32
|
+
liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
33
|
+
liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
|
|
34
|
+
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
35
|
+
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
36
|
+
liger_kernel/transformers/__init__.py,sha256=t70gqygxH63iz-B0MOdZx4AEgA8MfqU1G7N6dvIneCY,2618
|
|
37
|
+
liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
|
|
38
|
+
liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
|
|
39
|
+
liger_kernel/transformers/dyt.py,sha256=QMqqc14pkE0WhpRZvapfnNAun-6C0C_tHExL2ZJuCUA,648
|
|
40
|
+
liger_kernel/transformers/functional.py,sha256=4h9Pdx_iINBqfv2Zod_c27qOpYXDDwbdVgatQ9_XBmI,5089
|
|
41
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=09Rt7FZzLH42VOcIbQ4dlQd0o3Rlb4vk6fqiOQ7WTD8,1778
|
|
42
|
+
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
43
|
+
liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
|
|
44
|
+
liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
|
|
45
|
+
liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
|
|
46
|
+
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
47
|
+
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
48
|
+
liger_kernel/transformers/monkey_patch.py,sha256=95afvIrZA9xSWLNIJspBLbz8lxv2Y5gfZke7MyqoOX8,56965
|
|
49
|
+
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
50
|
+
liger_kernel/transformers/rms_norm.py,sha256=GqCEJuGt0YdqqlMcToE0Wp4A8YFquDa4UUSyH2uFW2A,1191
|
|
51
|
+
liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
|
|
52
|
+
liger_kernel/transformers/swiglu.py,sha256=i9WTqcNRqReU4XJs391IPbl-I5X0wG4T72D4pqGFfJg,2422
|
|
53
|
+
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
54
|
+
liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
|
|
55
|
+
liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
|
|
56
|
+
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
+
liger_kernel/transformers/model/gemma.py,sha256=7cBTljzh-8_ACBhYl6NUfj5_ux92YRlmnAU5gfDAQAI,9312
|
|
58
|
+
liger_kernel/transformers/model/gemma2.py,sha256=X0FOIhvFlTrmWI7Ws06wUkutgHW3lWtLOnnHp1NgZ3A,10403
|
|
59
|
+
liger_kernel/transformers/model/llama.py,sha256=d9rBaK8e8RSMCFHdgom9ZHuXOlnh6U_o-GkAFGRNGOY,9989
|
|
60
|
+
liger_kernel/transformers/model/llava.py,sha256=b0pEagjUbu2-eS9xegjyfl1DwIXLwZcNpff55ibaMbA,17601
|
|
61
|
+
liger_kernel/transformers/model/loss_utils.py,sha256=Z-fUrf-cUDUjUIH7Tl9OL2hT8nmtx7ES3kg8syuWKy4,1476
|
|
62
|
+
liger_kernel/transformers/model/mistral.py,sha256=o7tyl1sPWPfZwwrBLRlryHlSI8I55viuJoMI5Bh5Nww,5014
|
|
63
|
+
liger_kernel/transformers/model/mixtral.py,sha256=T0ITv2-PkR8VErVOVUizoS4EzjmARyR7GFh0tXDB_i4,11089
|
|
64
|
+
liger_kernel/transformers/model/mllama.py,sha256=RCKtwnGOMFYIbtt1zUQ15Cyv4eNpHkTWcgkmG2EEs2I,10804
|
|
65
|
+
liger_kernel/transformers/model/olmo2.py,sha256=5M8kczp4D-jvbjcV7cKATIJGF34xd-Rs-PPdKZWSIlY,4685
|
|
66
|
+
liger_kernel/transformers/model/paligemma.py,sha256=GNReT6tVZt3ON6aaa9ovg8mnu1hYocSx9OhgC7b-_28,19191
|
|
67
|
+
liger_kernel/transformers/model/phi3.py,sha256=NmU2DuU1Huwha6K7YSsJCnvQfUovTTGlsfBZhbx0UoI,9951
|
|
68
|
+
liger_kernel/transformers/model/qwen2.py,sha256=t7NotBHoebsPqNSxwaf9DXTg8jxgB5BdunSGqYOE0hQ,9240
|
|
69
|
+
liger_kernel/transformers/model/qwen2_5_vl.py,sha256=70BnHZjx6eQWTwi3zc5SMwxTeOOA4Tbdkfy6IYRcTaM,9289
|
|
70
|
+
liger_kernel/transformers/model/qwen2_vl.py,sha256=zo4O9fShNHYqSLrzLGqQYWSMtJI6UHaSY7zvMCYWyD8,9685
|
|
71
|
+
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
72
|
+
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
|
|
73
|
+
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
74
|
+
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
75
|
+
liger_kernel-0.5.6.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
76
|
+
liger_kernel-0.5.6.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
77
|
+
liger_kernel-0.5.6.dist-info/METADATA,sha256=yam1-5oz74ok_T_rVfn3RLvCDXPxDfXZpChC1PVTFoY,23002
|
|
78
|
+
liger_kernel-0.5.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
79
|
+
liger_kernel-0.5.6.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
80
|
+
liger_kernel-0.5.6.dist-info/RECORD,,
|
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
from functools import partial
|
|
2
|
-
|
|
3
|
-
import torch
|
|
4
|
-
import torch.nn.functional as F
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class LigerFusedLinearRLHFBase(torch.autograd.Function):
|
|
8
|
-
@staticmethod
|
|
9
|
-
def forward(
|
|
10
|
-
ctx,
|
|
11
|
-
_input,
|
|
12
|
-
weight,
|
|
13
|
-
attention_mask,
|
|
14
|
-
rewards,
|
|
15
|
-
bias=None,
|
|
16
|
-
loss_fn=None,
|
|
17
|
-
num_generations=4,
|
|
18
|
-
beta=0.1,
|
|
19
|
-
compiled=True,
|
|
20
|
-
use_ref_model=False,
|
|
21
|
-
ref_input=None,
|
|
22
|
-
ref_weight=None,
|
|
23
|
-
ref_bias=None,
|
|
24
|
-
):
|
|
25
|
-
"""Chunked forward pass for RLHF loss computation."""
|
|
26
|
-
# Save for backward
|
|
27
|
-
ctx.beta = beta
|
|
28
|
-
ctx.rewards = rewards
|
|
29
|
-
|
|
30
|
-
# Initialize accumulators
|
|
31
|
-
loss_acc = torch.zeros((), device=_input.device)
|
|
32
|
-
grad_weight = torch.zeros_like(weight) # [V, H]
|
|
33
|
-
grad_inputs = []
|
|
34
|
-
grad_bias = torch.zeros_like(bias) if bias is not None else None # [V]
|
|
35
|
-
aggregated_metrics = []
|
|
36
|
-
|
|
37
|
-
# Create a partial function with fixed arguments
|
|
38
|
-
compute_loss = partial(
|
|
39
|
-
LigerFusedLinearRLHFBase._compute_chunk_loss,
|
|
40
|
-
beta=beta,
|
|
41
|
-
use_ref_model=use_ref_model,
|
|
42
|
-
ref_weight=ref_weight,
|
|
43
|
-
ref_bias=ref_bias,
|
|
44
|
-
rlhf_loss_fn=loss_fn,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def fused_fwd_bwd(input_chunk, attention_mask_chunk, rewards_chunk, ref_input_chunk):
|
|
48
|
-
"""Fused forward and backward for a chunk."""
|
|
49
|
-
if bias is not None:
|
|
50
|
-
return torch.func.grad_and_value(compute_loss, argnums=(0, 1, 5), has_aux=True)(
|
|
51
|
-
input_chunk, # arg 0
|
|
52
|
-
weight, # arg 1
|
|
53
|
-
attention_mask_chunk, # arg 2
|
|
54
|
-
rewards_chunk, # arg 3
|
|
55
|
-
ref_input_chunk, # arg 4
|
|
56
|
-
bias, # arg 5
|
|
57
|
-
)
|
|
58
|
-
else:
|
|
59
|
-
return torch.func.grad_and_value(compute_loss, argnums=(0, 1), has_aux=True)(
|
|
60
|
-
input_chunk, # arg 0
|
|
61
|
-
weight, # arg 1
|
|
62
|
-
attention_mask_chunk, # arg 2
|
|
63
|
-
rewards_chunk, # arg 3
|
|
64
|
-
ref_input_chunk, # arg 4
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
def accumulate_chunk(input_chunk, attention_mask_chunk, rewards_chunk, ref_input_chunk=None):
|
|
68
|
-
if bias is not None:
|
|
69
|
-
(chunk_grad_input, chunk_grad_weight, chunk_grad_bias), (chunk_loss, chunk_metrics) = fused_fwd_bwd(
|
|
70
|
-
input_chunk, attention_mask_chunk, rewards_chunk, ref_input_chunk
|
|
71
|
-
)
|
|
72
|
-
grad_bias.add_(chunk_grad_bias)
|
|
73
|
-
else:
|
|
74
|
-
(chunk_grad_input, chunk_grad_weight), (chunk_loss, chunk_metrics) = fused_fwd_bwd(
|
|
75
|
-
input_chunk, attention_mask_chunk, rewards_chunk, ref_input_chunk
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
# Accumulate gradients and loss
|
|
79
|
-
grad_weight.add_(chunk_grad_weight)
|
|
80
|
-
grad_inputs.append(chunk_grad_input)
|
|
81
|
-
loss_acc.add_(chunk_loss)
|
|
82
|
-
|
|
83
|
-
# Initialize storage for metrics on first chunk
|
|
84
|
-
if len(aggregated_metrics) == 0:
|
|
85
|
-
for metric in chunk_metrics:
|
|
86
|
-
if metric.ndim == 0:
|
|
87
|
-
aggregated_metrics.append(torch.zeros((), device=metric.device))
|
|
88
|
-
else:
|
|
89
|
-
aggregated_metrics.append([])
|
|
90
|
-
|
|
91
|
-
# Accumulate metrics
|
|
92
|
-
for i, metric in enumerate(chunk_metrics):
|
|
93
|
-
if metric.ndim == 0:
|
|
94
|
-
aggregated_metrics[i].add_(metric)
|
|
95
|
-
else:
|
|
96
|
-
aggregated_metrics[i].append(metric)
|
|
97
|
-
|
|
98
|
-
if compiled:
|
|
99
|
-
accumulate_chunk = torch.compile(accumulate_chunk)
|
|
100
|
-
|
|
101
|
-
# Process input in chunks
|
|
102
|
-
chunks = max(1, _input.shape[0] // num_generations)
|
|
103
|
-
_input_chunks = torch.chunk(_input, chunks=chunks, dim=0)
|
|
104
|
-
_attention_mask_chunks = torch.chunk(attention_mask, chunks=chunks, dim=0)
|
|
105
|
-
_rewards_chunks = torch.chunk(rewards, chunks=chunks, dim=0)
|
|
106
|
-
_ref_input_chunks = torch.chunk(ref_input, chunks=chunks, dim=0) if use_ref_model else [None] * chunks
|
|
107
|
-
|
|
108
|
-
for input_chunk, attention_mask_chunk, rewards_chunk, ref_input_chunk in zip(
|
|
109
|
-
_input_chunks, _attention_mask_chunks, _rewards_chunks, _ref_input_chunks
|
|
110
|
-
):
|
|
111
|
-
# Mark dynamic dimensions
|
|
112
|
-
torch._dynamo.mark_dynamic(input_chunk, 1)
|
|
113
|
-
torch._dynamo.mark_dynamic(attention_mask_chunk, 1)
|
|
114
|
-
if ref_input_chunk is not None:
|
|
115
|
-
torch._dynamo.mark_dynamic(ref_input_chunk, 1)
|
|
116
|
-
|
|
117
|
-
accumulate_chunk(input_chunk, attention_mask_chunk, rewards_chunk, ref_input_chunk)
|
|
118
|
-
|
|
119
|
-
# Scale accumulated loss by number of chunks since we're averaging
|
|
120
|
-
loss_acc = loss_acc / chunks
|
|
121
|
-
|
|
122
|
-
# Combine gradients
|
|
123
|
-
grad_input = torch.cat(grad_inputs, dim=0)
|
|
124
|
-
|
|
125
|
-
# Save for backward
|
|
126
|
-
ctx.save_for_backward(grad_input, grad_weight, grad_bias)
|
|
127
|
-
|
|
128
|
-
# Finalize metrics
|
|
129
|
-
final_metrics = []
|
|
130
|
-
for metric in aggregated_metrics:
|
|
131
|
-
if isinstance(metric, list):
|
|
132
|
-
final_metrics.append(torch.cat(metric, dim=0))
|
|
133
|
-
else:
|
|
134
|
-
final_metrics.append(metric / chunks)
|
|
135
|
-
|
|
136
|
-
return loss_acc, tuple(final_metrics)
|
|
137
|
-
|
|
138
|
-
@staticmethod
|
|
139
|
-
def _compute_chunk_loss(
|
|
140
|
-
input_chunk,
|
|
141
|
-
weight,
|
|
142
|
-
attention_mask_chunk,
|
|
143
|
-
rewards_chunk,
|
|
144
|
-
ref_input_chunk=None,
|
|
145
|
-
bias=None,
|
|
146
|
-
beta=0.1,
|
|
147
|
-
use_ref_model=False,
|
|
148
|
-
ref_weight=None,
|
|
149
|
-
ref_bias=None,
|
|
150
|
-
rlhf_loss_fn=None,
|
|
151
|
-
):
|
|
152
|
-
"""Compute loss for a single chunk."""
|
|
153
|
-
# Get policy log probabilities using chunk_forward
|
|
154
|
-
log_probs, _, logits_mean = LigerFusedLinearRLHFBase.chunk_forward(input_chunk, weight, bias=bias)
|
|
155
|
-
|
|
156
|
-
# Get reference log probabilities if needed
|
|
157
|
-
ref_log_probs = None
|
|
158
|
-
if use_ref_model and ref_input_chunk is not None:
|
|
159
|
-
with torch.no_grad():
|
|
160
|
-
ref_log_probs, _, _ = LigerFusedLinearRLHFBase.chunk_forward(ref_input_chunk, ref_weight, bias=ref_bias)
|
|
161
|
-
|
|
162
|
-
# Compute chunk loss and metrics using the provided loss function
|
|
163
|
-
chunk_loss, chunk_metrics = rlhf_loss_fn(
|
|
164
|
-
log_probs=log_probs,
|
|
165
|
-
attention_mask=attention_mask_chunk,
|
|
166
|
-
rewards=rewards_chunk,
|
|
167
|
-
ref_log_probs=ref_log_probs,
|
|
168
|
-
beta=beta,
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
return chunk_loss, (logits_mean, *chunk_metrics)
|
|
172
|
-
|
|
173
|
-
@staticmethod
|
|
174
|
-
def chunk_forward(input_chunk, weight, bias=None):
|
|
175
|
-
"""Forward pass computation for a single chunk without explicit reshaping."""
|
|
176
|
-
# Directly compute logits via batched matrix multiplication: [B, T, H] @ [H, V] -> [B, T, V]
|
|
177
|
-
logits = torch.matmul(input_chunk, weight.t())
|
|
178
|
-
if bias is not None:
|
|
179
|
-
logits = logits + bias # Broadcasts bias to [B, T, V]
|
|
180
|
-
|
|
181
|
-
# Compute log probabilities using softmax over the last dimension
|
|
182
|
-
log_probs = F.log_softmax(logits.float(), dim=-1)
|
|
183
|
-
|
|
184
|
-
# Monitoring: compute mean of logits
|
|
185
|
-
batch_size, seq_len, _ = input_chunk.shape
|
|
186
|
-
logits_mean = logits.sum() / (batch_size * seq_len * weight.shape[0])
|
|
187
|
-
return log_probs, logits, logits_mean
|
|
188
|
-
|
|
189
|
-
@staticmethod
|
|
190
|
-
def backward(ctx, grad_output, *grad_metrics):
|
|
191
|
-
"""Backward pass for RLHF loss."""
|
|
192
|
-
grad_input, grad_weight, grad_bias = ctx.saved_tensors
|
|
193
|
-
if grad_output != 1.0:
|
|
194
|
-
grad_input = grad_input * grad_output
|
|
195
|
-
grad_weight = grad_weight * grad_output
|
|
196
|
-
if grad_bias is not None:
|
|
197
|
-
grad_bias = grad_bias * grad_output
|
|
198
|
-
|
|
199
|
-
return (
|
|
200
|
-
grad_input,
|
|
201
|
-
grad_weight,
|
|
202
|
-
None, # grad_attention_mask
|
|
203
|
-
None, # grad_rewards
|
|
204
|
-
grad_bias,
|
|
205
|
-
None, # grad_loss_fn
|
|
206
|
-
None, # grad_chunk_size
|
|
207
|
-
None, # grad_beta
|
|
208
|
-
None, # grad_compiled
|
|
209
|
-
None, # grad_use_ref_model
|
|
210
|
-
None, # grad_ref_input
|
|
211
|
-
None, # grad_ref_weight
|
|
212
|
-
None, # grad_ref_bias
|
|
213
|
-
)
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
|
|
3
|
-
liger_kernel/utils.py,sha256=FtVUkCGBT1UNasTl6HMNycWwiwHayK6tx-ZDdA-sNX4,1884
|
|
4
|
-
liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
|
|
5
|
-
liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
|
|
6
|
-
liger_kernel/chunked_loss/cpo_loss.py,sha256=OdBR8WYdHTKpLI_c9DcuwqKSWPeAAeTyREz46Vu_cAY,3682
|
|
7
|
-
liger_kernel/chunked_loss/dpo_loss.py,sha256=wgjnwzLfrMUwV5mXgrq6G1YfQKWnbiFJegaP48BGJHY,4509
|
|
8
|
-
liger_kernel/chunked_loss/functional.py,sha256=THWWpCnRVhTVfnPnyvQjdBvo1JDtxhwLmtZE_yiBBqM,817
|
|
9
|
-
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=5V8rdva89WyHVbmJ8JOmC4DYNOR6ByXfx3qlUieOZkI,11002
|
|
10
|
-
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=idK9V9NivoVITqVpiG0fEGUHSvinYWkn9-EYXZjR-KQ,18356
|
|
11
|
-
liger_kernel/chunked_loss/fused_linear_rlhf.py,sha256=sAApL4GQ3YL2F-ymIAF61GCpFfBgFcWF5LB4Gzd7LgY,8044
|
|
12
|
-
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=ZqYlXXhIphkJPxOS7iI70avgrr6x0skEtgpckZTYau0,9819
|
|
13
|
-
liger_kernel/chunked_loss/grpo_loss.py,sha256=M5qlQR-v5Rh8N3P3dPGNhOKygDFJ4516_rJaVPzU_-c,4980
|
|
14
|
-
liger_kernel/chunked_loss/jsd_loss.py,sha256=yRCQdvd3ruTWP4A_BfU8VcZ6LepSUfO0Ob7stGnueQY,6052
|
|
15
|
-
liger_kernel/chunked_loss/kto_loss.py,sha256=b3ffJyk97e-6XdXd4HFrYyx8wW4A-CU4gOaJSimKLtA,5476
|
|
16
|
-
liger_kernel/chunked_loss/orpo_loss.py,sha256=yjcrrbVeemLYodoSKT-FMSnaPtyKAZ3aOrvPD6tTY6Y,3617
|
|
17
|
-
liger_kernel/chunked_loss/simpo_loss.py,sha256=3TTc7U79Orjgi-Wu81WZkWk5MgsdqKXIOBHgIvDazPw,3865
|
|
18
|
-
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
liger_kernel/ops/cross_entropy.py,sha256=D6vFFloiuxFXoWfjlIjmfO3tVaWOiYmztw9FKAi5vdU,18608
|
|
20
|
-
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1Y3Uk_TCSjqKgoG2eot1ptnWXJXXQESqGvOmqAW1gsM,10912
|
|
21
|
-
liger_kernel/ops/fused_linear_jsd.py,sha256=Seshez2qaM6HiTQ8_HEqSwhaeVruNT1SvIM4ZrAPBEU,9602
|
|
22
|
-
liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,4126
|
|
23
|
-
liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
|
|
24
|
-
liger_kernel/ops/jsd.py,sha256=0jNeRxpcNI5ckxCdoCNyO5GEedLIuzx3lz6KAiksc4o,6109
|
|
25
|
-
liger_kernel/ops/kl_div.py,sha256=MnfuYqqQESON1X2Swy064x1urKtMFdgeSWd60VttBXI,8420
|
|
26
|
-
liger_kernel/ops/layer_norm.py,sha256=6roQjioyg-9O2qLPV8nL4U0-5UH80tdzOMTWwjvDnn8,7961
|
|
27
|
-
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
28
|
-
liger_kernel/ops/rms_norm.py,sha256=PWLJcdIKU5e-8BuYFHd9Cqlq6wmr6fUXKi9zQD4LetU,11727
|
|
29
|
-
liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
|
|
30
|
-
liger_kernel/ops/swiglu.py,sha256=KmgMjaJQnbLLgZn2nEpbwHU_xpnYRweCyrLQSVvM1vA,3015
|
|
31
|
-
liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
32
|
-
liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
|
|
33
|
-
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
34
|
-
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
35
|
-
liger_kernel/transformers/__init__.py,sha256=6v_VcV1GQ9ISgNCd-ZxtmEg_s5GTBQ9F-s1KrFkYzPQ,2265
|
|
36
|
-
liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
|
|
37
|
-
liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
|
|
38
|
-
liger_kernel/transformers/functional.py,sha256=ShLD3eb--XKNtllznCrOYTbo4f-1KVwzi0KLMICdrn4,4942
|
|
39
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=09Rt7FZzLH42VOcIbQ4dlQd0o3Rlb4vk6fqiOQ7WTD8,1778
|
|
40
|
-
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
41
|
-
liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
|
|
42
|
-
liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
|
|
43
|
-
liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
|
|
44
|
-
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
45
|
-
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
46
|
-
liger_kernel/transformers/monkey_patch.py,sha256=g3i3q5McBg23A3Mnviw-Eb32le1hvN7jByzONa9ngcs,44000
|
|
47
|
-
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
48
|
-
liger_kernel/transformers/rms_norm.py,sha256=GqCEJuGt0YdqqlMcToE0Wp4A8YFquDa4UUSyH2uFW2A,1191
|
|
49
|
-
liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
|
|
50
|
-
liger_kernel/transformers/swiglu.py,sha256=i9WTqcNRqReU4XJs391IPbl-I5X0wG4T72D4pqGFfJg,2422
|
|
51
|
-
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
52
|
-
liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
|
|
53
|
-
liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
|
|
54
|
-
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
liger_kernel/transformers/model/gemma.py,sha256=ky89b3aWPaeTGRMC-745KgixtQIRXzNAiCORAMLn9yo,9654
|
|
56
|
-
liger_kernel/transformers/model/gemma2.py,sha256=27NcoZjEqP9Lqb4Wf0EKqTbr2HTGiHPhrVyPCRGPz6s,10767
|
|
57
|
-
liger_kernel/transformers/model/llama.py,sha256=3LJFXKFDKvEakaWPc_NicSFst4Y_hdSMrdl1UDK1EcA,10330
|
|
58
|
-
liger_kernel/transformers/model/mistral.py,sha256=MVRksI5_j_8WJu8znOHKCdSI5jSu-S7cdFYzt9m_vIQ,5180
|
|
59
|
-
liger_kernel/transformers/model/mixtral.py,sha256=jpZJkpl625Q-JHWarj2MqT5mRaSsiCtg0c9vVyvOdCY,11430
|
|
60
|
-
liger_kernel/transformers/model/mllama.py,sha256=qWexBdskuN3gPJvPUwt4J0nU675tGD6W7wxgRZ9Bifg,11145
|
|
61
|
-
liger_kernel/transformers/model/olmo2.py,sha256=yyksS6E4fuWd8asEW8rEDBKqZpFmP4ITCM_bjIDZaoY,5124
|
|
62
|
-
liger_kernel/transformers/model/phi3.py,sha256=biRa8fph9qdnQmkD9I21t5XIjpIt1i6UKU4uk8Up8pU,10292
|
|
63
|
-
liger_kernel/transformers/model/qwen2.py,sha256=14UuPjxB-tjqWn85Tn4fqBFvVhVsth5iPEt8kJSMiew,9581
|
|
64
|
-
liger_kernel/transformers/model/qwen2_vl.py,sha256=yMLqsfSYcvhClUpTUjGoADiOxfLB2B8240VdrPP0c8s,9851
|
|
65
|
-
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
66
|
-
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
|
|
67
|
-
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
68
|
-
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
69
|
-
liger_kernel-0.5.4.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
70
|
-
liger_kernel-0.5.4.dist-info/METADATA,sha256=Zw7n3Ey6vUed4E54H9-TzKmhuOpd9P2ZFMVL-zYUnew,22255
|
|
71
|
-
liger_kernel-0.5.4.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
72
|
-
liger_kernel-0.5.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
73
|
-
liger_kernel-0.5.4.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
74
|
-
liger_kernel-0.5.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|