liger-kernel-nightly 0.5.5.dev20250402185702__py3-none-any.whl → 0.6.4.dev20260112233432__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- liger_kernel/chunked_loss/__init__.py +1 -0
- liger_kernel/chunked_loss/cosine_similarity_loss.py +142 -0
- liger_kernel/chunked_loss/dpo_loss.py +61 -3
- liger_kernel/chunked_loss/functional.py +2 -0
- liger_kernel/chunked_loss/fused_linear_distillation.py +23 -5
- liger_kernel/chunked_loss/fused_linear_ppo.py +36 -0
- liger_kernel/chunked_loss/fused_linear_preference.py +0 -1
- liger_kernel/chunked_loss/grpo_loss.py +76 -5
- liger_kernel/chunked_loss/jsd_loss.py +46 -15
- liger_kernel/ops/__init__.py +141 -0
- liger_kernel/ops/backends/README.md +151 -0
- liger_kernel/ops/backends/__init__.py +13 -0
- liger_kernel/ops/backends/_ascend/__init__.py +5 -0
- liger_kernel/ops/backends/_ascend/ascend-ub-manager-design.md +485 -0
- liger_kernel/ops/backends/_ascend/ops/__init__.py +49 -0
- liger_kernel/ops/backends/_ascend/ops/geglu.py +266 -0
- liger_kernel/ops/backends/_ascend/ops/qwen2vl_mrope.py +285 -0
- liger_kernel/ops/backends/_ascend/ops/rope.py +290 -0
- liger_kernel/ops/backends/_ascend/ops/swiglu.py +142 -0
- liger_kernel/ops/backends/_ascend/ops/tvd.py +221 -0
- liger_kernel/ops/backends/_ascend/ub_manager.py +349 -0
- liger_kernel/ops/backends/registry.py +61 -0
- liger_kernel/ops/cross_entropy.py +134 -65
- liger_kernel/ops/dyt.py +115 -180
- liger_kernel/ops/fused_add_rms_norm.py +416 -0
- liger_kernel/ops/fused_linear_cross_entropy.py +117 -23
- liger_kernel/ops/fused_neighborhood_attention.py +1022 -0
- liger_kernel/ops/geglu.py +6 -4
- liger_kernel/ops/group_norm.py +7 -7
- liger_kernel/ops/grpo_loss.py +312 -0
- liger_kernel/ops/jsd.py +2 -1
- liger_kernel/ops/kl_div.py +9 -5
- liger_kernel/ops/layer_norm.py +146 -78
- liger_kernel/ops/llama4_rope.py +225 -0
- liger_kernel/ops/multi_token_attention.py +207 -0
- liger_kernel/ops/poly_norm.py +390 -0
- liger_kernel/ops/rms_norm.py +398 -99
- liger_kernel/ops/rope.py +1 -1
- liger_kernel/ops/softmax.py +201 -0
- liger_kernel/ops/sparsemax.py +179 -0
- liger_kernel/ops/swiglu.py +1 -1
- liger_kernel/ops/tiled_mlp.py +136 -0
- liger_kernel/ops/utils.py +14 -0
- liger_kernel/transformers/__init__.py +208 -17
- liger_kernel/transformers/auto_model.py +21 -0
- liger_kernel/transformers/cross_entropy.py +9 -4
- liger_kernel/transformers/dyt.py +6 -4
- liger_kernel/transformers/experimental/__init__.py +5 -0
- liger_kernel/transformers/experimental/embedding.py +1 -1
- liger_kernel/transformers/fsdp.py +55 -0
- liger_kernel/transformers/functional.py +122 -20
- liger_kernel/transformers/fused_add_rms_norm.py +39 -0
- liger_kernel/transformers/fused_linear_cross_entropy.py +16 -5
- liger_kernel/transformers/fused_linear_jsd.py +1 -1
- liger_kernel/transformers/fused_neighborhood_attention.py +234 -0
- liger_kernel/transformers/geglu.py +1 -1
- liger_kernel/transformers/group_norm.py +1 -1
- liger_kernel/transformers/grpo_loss.py +153 -0
- liger_kernel/transformers/jsd.py +1 -1
- liger_kernel/transformers/kl_div.py +1 -1
- liger_kernel/transformers/layer_norm.py +1 -1
- liger_kernel/transformers/llama4_rope.py +93 -0
- liger_kernel/transformers/model/exaone4.py +136 -0
- liger_kernel/transformers/model/falcon_h1.py +122 -0
- liger_kernel/transformers/model/gemma.py +57 -27
- liger_kernel/transformers/model/gemma2.py +65 -28
- liger_kernel/transformers/model/gemma3.py +331 -0
- liger_kernel/transformers/model/glm4.py +141 -0
- liger_kernel/transformers/model/glm4v.py +163 -0
- liger_kernel/transformers/model/glm4v_moe.py +172 -0
- liger_kernel/transformers/model/gpt_oss.py +211 -0
- liger_kernel/transformers/model/hunyuan_v1.py +134 -0
- liger_kernel/transformers/model/internvl.py +157 -0
- liger_kernel/transformers/model/llama.py +109 -27
- liger_kernel/transformers/model/llama4.py +121 -0
- liger_kernel/transformers/model/llava.py +111 -136
- liger_kernel/transformers/model/loss_utils.py +50 -12
- liger_kernel/transformers/model/mistral.py +51 -34
- liger_kernel/transformers/model/mixtral.py +50 -29
- liger_kernel/transformers/model/mllama.py +46 -24
- liger_kernel/transformers/model/olmo2.py +47 -22
- liger_kernel/transformers/model/olmo3.py +142 -0
- liger_kernel/transformers/model/output_classes.py +147 -0
- liger_kernel/transformers/model/paligemma.py +50 -14
- liger_kernel/transformers/model/phi3.py +47 -172
- liger_kernel/transformers/model/qwen2.py +55 -23
- liger_kernel/transformers/model/qwen2_5_vl.py +62 -103
- liger_kernel/transformers/model/qwen2_vl.py +59 -108
- liger_kernel/transformers/model/qwen3.py +136 -0
- liger_kernel/transformers/model/qwen3_moe.py +152 -0
- liger_kernel/transformers/model/qwen3_next.py +146 -0
- liger_kernel/transformers/model/qwen3_vl.py +150 -0
- liger_kernel/transformers/model/qwen3_vl_moe.py +126 -0
- liger_kernel/transformers/model/smollm3.py +199 -0
- liger_kernel/transformers/model/smolvlm.py +158 -0
- liger_kernel/transformers/monkey_patch.py +2018 -244
- liger_kernel/transformers/multi_token_attention.py +64 -0
- liger_kernel/transformers/poly_norm.py +42 -0
- liger_kernel/transformers/qwen2vl_mrope.py +1 -1
- liger_kernel/transformers/rms_norm.py +54 -6
- liger_kernel/transformers/rope.py +45 -1
- liger_kernel/transformers/softmax.py +12 -0
- liger_kernel/transformers/sparsemax.py +16 -0
- liger_kernel/transformers/swiglu.py +39 -1
- liger_kernel/transformers/tiled_mlp.py +125 -0
- liger_kernel/transformers/trainer/orpo_trainer.py +1 -53
- liger_kernel/transformers/tvd.py +1 -1
- liger_kernel/utils.py +63 -0
- {liger_kernel_nightly-0.5.5.dev20250402185702.dist-info → liger_kernel_nightly-0.6.4.dev20260112233432.dist-info}/METADATA +73 -39
- liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/RECORD +132 -0
- liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/RECORD +0 -80
- {liger_kernel_nightly-0.5.5.dev20250402185702.dist-info → liger_kernel_nightly-0.6.4.dev20260112233432.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.5.dev20250402185702.dist-info → liger_kernel_nightly-0.6.4.dev20260112233432.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.5.dev20250402185702.dist-info → liger_kernel_nightly-0.6.4.dev20260112233432.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.5.5.dev20250402185702.dist-info → liger_kernel_nightly-0.6.4.dev20260112233432.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.4.dev20260112233432
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -33,18 +33,18 @@ License-File: NOTICE
|
|
|
33
33
|
Requires-Dist: torch>=2.1.2
|
|
34
34
|
Requires-Dist: triton>=2.3.1
|
|
35
35
|
Provides-Extra: dev
|
|
36
|
-
Requires-Dist: transformers>=4.
|
|
36
|
+
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: black>=24.4.2; extra == "dev"
|
|
40
|
-
Requires-Dist: isort>=5.13.2; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.12.0; extra == "dev"
|
|
41
39
|
Requires-Dist: pytest>=7.1.2; extra == "dev"
|
|
42
40
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
43
43
|
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
|
44
44
|
Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
45
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
46
|
-
Requires-Dist: mkdocs; extra == "dev"
|
|
47
46
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
47
|
+
Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
48
48
|
|
|
49
49
|
<a name="readme-top"></a>
|
|
50
50
|
|
|
@@ -56,7 +56,6 @@ Requires-Dist: mkdocs-material; extra == "dev"
|
|
|
56
56
|
<th style="padding: 10px;" colspan="2">Stable</th>
|
|
57
57
|
<th style="padding: 10px;" colspan="2">Nightly</th>
|
|
58
58
|
<th style="padding: 10px;">Discord</th>
|
|
59
|
-
<th style="padding: 10px;">Build</th>
|
|
60
59
|
</tr>
|
|
61
60
|
<tr>
|
|
62
61
|
<td style="padding: 10px;">
|
|
@@ -80,27 +79,10 @@ Requires-Dist: mkdocs-material; extra == "dev"
|
|
|
80
79
|
</a>
|
|
81
80
|
</td>
|
|
82
81
|
<td style="padding: 10px;">
|
|
83
|
-
<a href="https://discord.gg/
|
|
84
|
-
<img src="https://dcbadge.
|
|
82
|
+
<a href="https://discord.gg/X4MaxPgA">
|
|
83
|
+
<img src="https://dcbadge.limes.pink/api/server/https://discord.gg/X4MaxPgA?style=flat" alt="Join Our Discord">
|
|
85
84
|
</a>
|
|
86
85
|
</td>
|
|
87
|
-
<td style="padding: 10px;">
|
|
88
|
-
<div style="display: block;">
|
|
89
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
90
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
91
|
-
</a>
|
|
92
|
-
</div>
|
|
93
|
-
<div style="display: block;">
|
|
94
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
95
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
96
|
-
</a>
|
|
97
|
-
</div>
|
|
98
|
-
<div style="display: block;">
|
|
99
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
100
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
101
|
-
</a>
|
|
102
|
-
</div>
|
|
103
|
-
</td>
|
|
104
86
|
</tr>
|
|
105
87
|
</table>
|
|
106
88
|
|
|
@@ -113,6 +95,7 @@ Requires-Dist: mkdocs-material; extra == "dev"
|
|
|
113
95
|
<details>
|
|
114
96
|
<summary>Latest News 🔥</summary>
|
|
115
97
|
|
|
98
|
+
- [2025/12/19] We announced a liger kernel discord channel at https://discord.gg/X4MaxPgA; We will be hosting Liger Kernel x Triton China Meetup in mid of January 2026
|
|
116
99
|
- [2025/03/06] We release a joint blog post on TorchTune × Liger - [Peak Performance, Minimized Memory: Optimizing torchtune’s performance with torch.compile & Liger Kernel](https://pytorch.org/blog/peak-performance-minimized-memory/)
|
|
117
100
|
- [2024/12/11] We release [v0.5.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.5.0): 80% more memory efficient post training losses (DPO, ORPO, CPO, etc)!
|
|
118
101
|
- [2024/12/5] We release LinkedIn Engineering Blog - [Liger-Kernel: Empowering an open source ecosystem of Triton Kernels for Efficient LLM Training](https://www.linkedin.com/blog/engineering/open-source/liger-kernel-open-source-ecosystem-for-efficient-llm-training)
|
|
@@ -129,6 +112,10 @@ Requires-Dist: mkdocs-material; extra == "dev"
|
|
|
129
112
|
|
|
130
113
|
We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
|
|
131
114
|
|
|
115
|
+
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
116
|
+
|
|
117
|
+
You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
|
|
118
|
+
|
|
132
119
|
## Supercharge Your Model with Liger Kernel
|
|
133
120
|
|
|
134
121
|

|
|
@@ -193,8 +180,8 @@ y = orpo_loss(lm_head.weight, x, target)
|
|
|
193
180
|
- `triton >= 3.0.0` Install from pypi. (e.g. `pip install triton==3.0.0`)
|
|
194
181
|
|
|
195
182
|
```bash
|
|
196
|
-
|
|
197
|
-
|
|
183
|
+
pip install -e .[dev]
|
|
184
|
+
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
|
|
198
185
|
```
|
|
199
186
|
|
|
200
187
|
### Optional Dependencies
|
|
@@ -228,6 +215,9 @@ pip install -e .
|
|
|
228
215
|
|
|
229
216
|
# Setup Development Dependencies
|
|
230
217
|
pip install -e ".[dev]"
|
|
218
|
+
|
|
219
|
+
# NOTE -> For AMD users only
|
|
220
|
+
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
|
|
231
221
|
```
|
|
232
222
|
|
|
233
223
|
|
|
@@ -305,19 +295,30 @@ loss.backward()
|
|
|
305
295
|
|
|
306
296
|
| **Model** | **API** | **Supported Operations** |
|
|
307
297
|
|-------------|--------------------------------------------------------------|-------------------------------------------------------------------------|
|
|
298
|
+
| Llama4 (Text) & (Multimodal) | `liger_kernel.transformers.apply_liger_kernel_to_llama4` | RMSNorm, LayerNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
308
299
|
| LLaMA 2 & 3 | `liger_kernel.transformers.apply_liger_kernel_to_llama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
309
300
|
| LLaMA 3.2-Vision | `liger_kernel.transformers.apply_liger_kernel_to_mllama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
310
301
|
| Mistral | `liger_kernel.transformers.apply_liger_kernel_to_mistral` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
311
302
|
| Mixtral | `liger_kernel.transformers.apply_liger_kernel_to_mixtral` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
312
303
|
| Gemma1 | `liger_kernel.transformers.apply_liger_kernel_to_gemma` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
313
304
|
| Gemma2 | `liger_kernel.transformers.apply_liger_kernel_to_gemma2` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
305
|
+
| Gemma3 (Text) | `liger_kernel.transformers.apply_liger_kernel_to_gemma3_text` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
306
|
+
| Gemma3 (Multimodal) | `liger_kernel.transformers.apply_liger_kernel_to_gemma3` | LayerNorm, RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
314
307
|
| Paligemma, Paligemma2, & Paligemma2 Mix | `liger_kernel.transformers.apply_liger_kernel_to_paligemma` | LayerNorm, RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
315
308
|
| Qwen2, Qwen2.5, & QwQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
316
309
|
| Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
317
310
|
| Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
311
|
+
| Qwen3 | `liger_kernel.transformers.apply_liger_kernel_to_qwen3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
312
|
+
| Qwen3 MoE | `liger_kernel.transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
318
313
|
| Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
319
314
|
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
320
315
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
316
|
+
| Olmo3 | `liger_kernel.transformers.apply_liger_kernel_to_olmo3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
317
|
+
| GLM-4 | `liger_kernel.transformers.apply_liger_kernel_to_glm4` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
318
|
+
| GPT-OSS | `liger_kernel.transformers.apply_liger_kernel_to_gpt_oss` | RoPE, RMSNorm, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
319
|
+
| InternVL3 | `liger_kernel.transformers.apply_liger_kernel_to_internvl` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
320
|
+
| HunyuanV1 | `liger_kernel.transformers.apply_liger_kernel_to_hunyuan_v1_dense` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
321
|
+
| HunyuanV1 MoE | `liger_kernel.transformers.apply_liger_kernel_to_hunyuan_v1_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
321
322
|
|
|
322
323
|
|
|
323
324
|
## Low-level APIs
|
|
@@ -335,7 +336,10 @@ loss.backward()
|
|
|
335
336
|
| SwiGLU | `liger_kernel.transformers.LigerSwiGLUMLP` |
|
|
336
337
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
|
337
338
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
|
338
|
-
| Fused Linear CrossEntropy
|
|
339
|
+
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
|
340
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
|
341
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
|
342
|
+
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
|
339
343
|
|
|
340
344
|
|
|
341
345
|
### Alignment Kernels
|
|
@@ -383,25 +387,53 @@ loss.backward()
|
|
|
383
387
|
- [Axolotl](https://axolotl.ai/): Integrating Liger Kernel into Axolotl.
|
|
384
388
|
- [Llama-Factory](https://github.com/hiyouga/LLaMA-Factory): Integrating Liger Kernel into Llama-Factory.
|
|
385
389
|
|
|
390
|
+
|
|
391
|
+
## CI status
|
|
392
|
+
|
|
393
|
+
<table style="width: 100%; text-align: center; border-collapse: collapse;">
|
|
394
|
+
<tr>
|
|
395
|
+
<th style="padding: 10px;">Build</th>
|
|
396
|
+
</tr>
|
|
397
|
+
<tr>
|
|
398
|
+
<td style="padding: 10px;">
|
|
399
|
+
<div style="display: block;">
|
|
400
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
401
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?branch=main&event=push" alt="Build">
|
|
402
|
+
</a>
|
|
403
|
+
</div>
|
|
404
|
+
<div style="display: block;">
|
|
405
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
406
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?branch=main&event=push" alt="Build">
|
|
407
|
+
</a>
|
|
408
|
+
</div>
|
|
409
|
+
<div style="display: block;">
|
|
410
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml">
|
|
411
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?branch=main&event=push" alt="Build">
|
|
412
|
+
</a>
|
|
413
|
+
</div>
|
|
414
|
+
</td>
|
|
415
|
+
</tr>
|
|
416
|
+
</table>
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
|
|
386
420
|
## Contact
|
|
387
421
|
|
|
388
422
|
- For issues, create a Github ticket in this repository
|
|
389
423
|
- For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
|
|
390
|
-
- For formal collaboration, send an email to yannchen@linkedin.com and
|
|
424
|
+
- For formal collaboration, send an email to Yanning Chen(yannchen@linkedin.com) and Zhipeng Wang(zhipwang@linkedin.com)
|
|
391
425
|
|
|
392
426
|
## Cite this work
|
|
393
427
|
|
|
394
428
|
Biblatex entry:
|
|
395
429
|
```bib
|
|
396
|
-
@
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
url={https://arxiv.org/abs/2410.10989},
|
|
404
|
-
journal={arXiv preprint arXiv:2410.10989},
|
|
430
|
+
@inproceedings{
|
|
431
|
+
hsu2025ligerkernel,
|
|
432
|
+
title={Liger-Kernel: Efficient Triton Kernels for {LLM} Training},
|
|
433
|
+
author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen and Zhipeng Wang},
|
|
434
|
+
booktitle={Championing Open-source DEvelopment in ML Workshop @ ICML25},
|
|
435
|
+
year={2025},
|
|
436
|
+
url={https://openreview.net/forum?id=36SjAIT42G}
|
|
405
437
|
}
|
|
406
438
|
```
|
|
407
439
|
|
|
@@ -413,3 +445,5 @@ Biblatex entry:
|
|
|
413
445
|
↑ Back to Top ↑
|
|
414
446
|
</a>
|
|
415
447
|
</p>
|
|
448
|
+
|
|
449
|
+
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
|
|
3
|
+
liger_kernel/utils.py,sha256=1SXJmyVtn-aoyUkA-Acsur_hdHqtxoGqN4v4Vk820bE,3845
|
|
4
|
+
liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
|
|
5
|
+
liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
|
|
6
|
+
liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=h8lPAkw8oYPUUBZ5YEG2tMMmQ7XkWnOo7r29A5vx-Eg,4759
|
|
7
|
+
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
|
8
|
+
liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
|
|
9
|
+
liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
|
|
10
|
+
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=Bjdxnjfg-GwYvMJ102juP06gFMlbkxYPpvv7cV_mZcg,12912
|
|
11
|
+
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=baU19PwqO1FTVxwlB-eyJv6gOLtL7baXGzSncYQ8Ktc,14296
|
|
12
|
+
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=FIH85uUXAOgYx5Ax8MjFhJHVu-2pKtY7wSegd0zSyyY,18336
|
|
13
|
+
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
|
|
14
|
+
liger_kernel/chunked_loss/grpo_loss.py,sha256=bmuZaNgqNbJ5pJGFDXWE-B4BGYF7xWVSN15UyCfuq_s,13079
|
|
15
|
+
liger_kernel/chunked_loss/jsd_loss.py,sha256=NJKmJ76_-kI875ZkC4hQfC4nAvPNCj4ZsNyDNsfD74k,8761
|
|
16
|
+
liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
|
|
17
|
+
liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
|
|
18
|
+
liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
|
|
19
|
+
liger_kernel/ops/__init__.py,sha256=F3m9qlXbgttykKEBsrMFf1WyK_0H8CKqLuDnFRR-cvc,7237
|
|
20
|
+
liger_kernel/ops/cross_entropy.py,sha256=DnXFRZ9TGN1SnEo8xGBFFPLNQaen8aLVNPJ1em-LbK4,22910
|
|
21
|
+
liger_kernel/ops/dyt.py,sha256=4XmkCCZaPPM8Tl4QHo6vSF2m68jrwsnjucrbyOJvZpM,5628
|
|
22
|
+
liger_kernel/ops/fused_add_rms_norm.py,sha256=lvwrLsKvoAQqS9KatgBkAyy0Xdecado-g0rvXYXaBak,14237
|
|
23
|
+
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1gx2qljre9PVc861iknFnNCGC-P35D2w1cc_yMDO9ow,16239
|
|
24
|
+
liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
|
|
25
|
+
liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
|
|
26
|
+
liger_kernel/ops/geglu.py,sha256=-ruMACDsFH1YsAak6BGvZ0ktLGIrBE6yGF0dAyR82UU,4307
|
|
27
|
+
liger_kernel/ops/group_norm.py,sha256=2usnMR0bYEiur733oLc_6TBbI0em5BZonmlGrP2_AKM,11016
|
|
28
|
+
liger_kernel/ops/grpo_loss.py,sha256=2SyOujtF9I3xiNo4wFf4s6MeiDotE_qeYfRWgj_bOBE,9573
|
|
29
|
+
liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
|
|
30
|
+
liger_kernel/ops/kl_div.py,sha256=MZZb7eAPMXlydYVV4uL9aTytXFkdQdp-jmiDw9tC0pg,8652
|
|
31
|
+
liger_kernel/ops/layer_norm.py,sha256=-4UEyko9eKgBi5LNmfdEU2hTpJOWVnEy5iYjJkMvHmk,10598
|
|
32
|
+
liger_kernel/ops/llama4_rope.py,sha256=-aqdZzllklTN8b9--e-TsWY_ntGCN8-tyseT4x0bd8s,8223
|
|
33
|
+
liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
|
|
34
|
+
liger_kernel/ops/poly_norm.py,sha256=5IdJEZnbbhblkL_X8UhSD4A2CooQbOAZJw8nAekWNs4,11372
|
|
35
|
+
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
36
|
+
liger_kernel/ops/rms_norm.py,sha256=-n5F_D4SJccXdHUrFwgGG-CgubjLs-iWGItapGnm4NA,21824
|
|
37
|
+
liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
|
|
38
|
+
liger_kernel/ops/softmax.py,sha256=tgORx6MK1IDDtZKqGarj0IPIVjqAIEUXXYPiinhRdtI,5864
|
|
39
|
+
liger_kernel/ops/sparsemax.py,sha256=AeWe1xgkHJFEKWTj2vu_0hj7LztGvjqXAps-QTpCY0U,5087
|
|
40
|
+
liger_kernel/ops/swiglu.py,sha256=D7nd4u_LInwsIRNCDdY77lqnTz8-W5dJrpEAt8zEO_A,3033
|
|
41
|
+
liger_kernel/ops/tiled_mlp.py,sha256=eyMFsFFgHch8a_6R6IYRG24_jqKg5GF_BQUoQuAG8SY,4529
|
|
42
|
+
liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
43
|
+
liger_kernel/ops/utils.py,sha256=Xu6MJ2-lbp4hSmI0JGImKguKU0KqWnFQDgQwOxSieyc,4360
|
|
44
|
+
liger_kernel/ops/backends/README.md,sha256=ZP59UUqD1WW8LwM5Y-cTpSM-Dtgdp8Wku2mE9kqAc2E,4185
|
|
45
|
+
liger_kernel/ops/backends/__init__.py,sha256=-mgef3cHfDFeL5NbXbq1TI7ngCahE9qqL3aMaHnXvis,629
|
|
46
|
+
liger_kernel/ops/backends/registry.py,sha256=yJa_Sh2FZ__iPCIU8h2nOQbnsFQh1I-_czROLtb1uQM,1637
|
|
47
|
+
liger_kernel/ops/backends/_ascend/__init__.py,sha256=6n0keOX9H-kLadBdVZlx-Ce0ZLVJvLiEfR-9-uxmYUk,221
|
|
48
|
+
liger_kernel/ops/backends/_ascend/ascend-ub-manager-design.md,sha256=FVXHSO1KY4ZFxCAE5r4hOYB2Q8ANyrJZ7WnFJ_GeQOA,19605
|
|
49
|
+
liger_kernel/ops/backends/_ascend/ub_manager.py,sha256=3Utke2Dwx9huB0Qoch1KU2CXKN3JS5DbP9_JusIbfQU,13174
|
|
50
|
+
liger_kernel/ops/backends/_ascend/ops/__init__.py,sha256=N41VgPn8D_YJpHez1-UEYTtA-JZxpERmAzN7WcDfE2U,2067
|
|
51
|
+
liger_kernel/ops/backends/_ascend/ops/geglu.py,sha256=M3YFE44UREf91PtOvY0X_GZouUxeeDCy3GmXDrvRLQk,10131
|
|
52
|
+
liger_kernel/ops/backends/_ascend/ops/qwen2vl_mrope.py,sha256=pUYcstJ4FuzDTkuhmQaO3U9gcVQoNCpzuwwUdtES5hM,11015
|
|
53
|
+
liger_kernel/ops/backends/_ascend/ops/rope.py,sha256=nOwtm6_eSnzDjl2S-jvGpwHrumAOgWfr5pNg6SL3R2k,10842
|
|
54
|
+
liger_kernel/ops/backends/_ascend/ops/swiglu.py,sha256=yrbEgIgeCZyayMYHCRNq7LntZE9cEemht39_TFPro0k,4682
|
|
55
|
+
liger_kernel/ops/backends/_ascend/ops/tvd.py,sha256=4Q_DXSuVRqummX5dwFT5zOgQpdaWViLbMPjJ3kWy2IE,7745
|
|
56
|
+
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
57
|
+
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
58
|
+
liger_kernel/transformers/__init__.py,sha256=h7U1Vxrg5OoqOstBmZMd-0G0LROYleYt_fS-RpvEq84,11057
|
|
59
|
+
liger_kernel/transformers/auto_model.py,sha256=RnJhK8xHamRnnswgRLG_muJE1i6T6LszjK8lC6vonhE,2410
|
|
60
|
+
liger_kernel/transformers/cross_entropy.py,sha256=08H8RxSxGX_52UzrHNnSZ_wWH-uvU8KrRiDmVrkOw14,1996
|
|
61
|
+
liger_kernel/transformers/dyt.py,sha256=Rng-MZQSprnGGWFtpmYKt7MIX26vFUYbq5ruM4MjH-U,719
|
|
62
|
+
liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
|
|
63
|
+
liger_kernel/transformers/functional.py,sha256=f9sOWEfh5HZwOH5cVlcB_ts0MB_-fFFPki8PVZ5w__M,8352
|
|
64
|
+
liger_kernel/transformers/fused_add_rms_norm.py,sha256=k98sfcZhsgtdVxChciHmv0WUizzn6f-Rn72JtGgmafI,1180
|
|
65
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=WnGuR_rjIWO0XHUyVakz-qsIRm028OKzi1vayvmPfbg,2320
|
|
66
|
+
liger_kernel/transformers/fused_linear_jsd.py,sha256=BW22DX3J6J8uZdoaU9JFUU5HnTrNYL63H9IQZzHkGu0,3982
|
|
67
|
+
liger_kernel/transformers/fused_neighborhood_attention.py,sha256=21O9DSRXgMQst9Lc3b62CsOLkYn-hjuskj9Zi3mvG7Y,7928
|
|
68
|
+
liger_kernel/transformers/geglu.py,sha256=esltAhNJZjWydvh07C6EaTdjA2aQzFPMNK92yR15SEI,1101
|
|
69
|
+
liger_kernel/transformers/group_norm.py,sha256=k7LDIG8H5CA5kiNj2uOi8D_Z6FlZtQDLyzJQxK2E-gA,2162
|
|
70
|
+
liger_kernel/transformers/grpo_loss.py,sha256=wNVz1o3q9XH17tDqaCZFEVXJhH9mQX44pWhQEwiRo_Q,6088
|
|
71
|
+
liger_kernel/transformers/jsd.py,sha256=_KlOX8YcdONU0tq0bIRDQ5VDBwtywm3Ro-FmlmI01qk,2975
|
|
72
|
+
liger_kernel/transformers/kl_div.py,sha256=94VR4uuj-2dZCTEnwFksvDi-LporrpB5HgmYtQCZnw0,402
|
|
73
|
+
liger_kernel/transformers/layer_norm.py,sha256=l4nsT_Zj4CdVZOM7F0I0Ox-lmLHyIJzqQvVaF0o0HbI,895
|
|
74
|
+
liger_kernel/transformers/llama4_rope.py,sha256=A_nxcS_KiUCyNeL2FAZX7yUhDsX7krrI9BG49OaN_nM,3627
|
|
75
|
+
liger_kernel/transformers/monkey_patch.py,sha256=hCFLKviPteLyDTUxjehiUS6k4hEx2GHDEualDhKpEYs,138949
|
|
76
|
+
liger_kernel/transformers/multi_token_attention.py,sha256=LtEjG7qy1-JK-HIPaz8zZ4P08aSZTnj5D635Pa04Onc,1730
|
|
77
|
+
liger_kernel/transformers/poly_norm.py,sha256=T3VdLQHLcCY7KzNzrc6IJRs8SzO8Yc7a0BS_2p6d7Wo,1367
|
|
78
|
+
liger_kernel/transformers/qwen2vl_mrope.py,sha256=0hOBR3j2Yd6xbT4z9BNRKEy1D0eyOUsIW6EmI_3PPNI,1033
|
|
79
|
+
liger_kernel/transformers/rms_norm.py,sha256=dD_69_GA3GUdtvdYVxTLKGeg8QZinJpS3qfeV7WvOuA,3237
|
|
80
|
+
liger_kernel/transformers/rope.py,sha256=-W9aYLa2hMOmmG5yeHcvPsOI5UTc95ylYxUddxkwmkA,2867
|
|
81
|
+
liger_kernel/transformers/softmax.py,sha256=VI5QGHYpXSiXckgovEnDGcXwitimsxKB0GX-AT4dAC4,256
|
|
82
|
+
liger_kernel/transformers/sparsemax.py,sha256=Os49bSpPX4pWymsasv_3j20m8GFaI54e03XFPkHiPE0,393
|
|
83
|
+
liger_kernel/transformers/swiglu.py,sha256=LpgikAs9hibAL7G6itygBbOlW9tZe5s4D2IGAKGpbPw,4284
|
|
84
|
+
liger_kernel/transformers/tiled_mlp.py,sha256=_Go2bN8huL4I0EHBPXNfpIRaEukl8hiQEEJIwpJST20,4498
|
|
85
|
+
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
86
|
+
liger_kernel/transformers/tvd.py,sha256=GYjhtXgS3RTPveOTN2gyK4uBnjs6ii2vkSZRX21QpqA,446
|
|
87
|
+
liger_kernel/transformers/experimental/__init__.py,sha256=oQqk-f32JYgWEP9DJCj6ty6bbJSGrdXsFDQFwGeX6vI,127
|
|
88
|
+
liger_kernel/transformers/experimental/embedding.py,sha256=bjy9hHj--ivy6xEWdiE6qLy9uLyeS4PsBEgl_MdDrng,858
|
|
89
|
+
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
|
+
liger_kernel/transformers/model/exaone4.py,sha256=T5Ef2FnkJ-i8ktRWvBB5GXFOIyJmvMPyGsDFt5awpmE,5802
|
|
91
|
+
liger_kernel/transformers/model/falcon_h1.py,sha256=heUZ4wUt2ATmtBtmv8Rcro3pQl6fV9T0pburjTTW7os,5004
|
|
92
|
+
liger_kernel/transformers/model/gemma.py,sha256=pAri4PYpknsFfkvyo8Ez2NNlqrUDW-KkExUXTGZAcH4,10621
|
|
93
|
+
liger_kernel/transformers/model/gemma2.py,sha256=KgSpXVi04c8hVFa7dqJtjzVobz6z7BNTvGc1WjoV4nk,12006
|
|
94
|
+
liger_kernel/transformers/model/gemma3.py,sha256=6WMn9n8zuylzO2E79eNeqFfD6wPW2EFDuLsFRMzh4so,14911
|
|
95
|
+
liger_kernel/transformers/model/glm4.py,sha256=bSp22iPIjsli4-c_usUOsyh1Bs2gIK8X6ynS0azseUs,5900
|
|
96
|
+
liger_kernel/transformers/model/glm4v.py,sha256=dd-BQpccDCp1SbIxcJ5rG8xcwYQK3KOv1Tgm9TGnZc4,6594
|
|
97
|
+
liger_kernel/transformers/model/glm4v_moe.py,sha256=zKhMdOOrRhlrvCSFaeVYfddL1ubpY8edEO91TN81n98,7135
|
|
98
|
+
liger_kernel/transformers/model/gpt_oss.py,sha256=8jEAQQNEXgVA-yuvEjKkBQvCvZy0E9ns-O9BPlajXXU,11197
|
|
99
|
+
liger_kernel/transformers/model/hunyuan_v1.py,sha256=MJvP9xkUFePIV0HLETJM4YPbVCEPkAE1ZI5Jxyiebh0,5731
|
|
100
|
+
liger_kernel/transformers/model/internvl.py,sha256=OOutracs9qrPHSU7FVYar08yinvGrHQVPvo39JEws6w,6473
|
|
101
|
+
liger_kernel/transformers/model/llama.py,sha256=kqZeONzwTBzudoChlKMzq1w23BtYGbxWZC1l1V__JTw,13410
|
|
102
|
+
liger_kernel/transformers/model/llama4.py,sha256=PfkynGVI0xxMs3EtyYpCgaALI6stu25OIrTIymE-pvg,4853
|
|
103
|
+
liger_kernel/transformers/model/llava.py,sha256=yoADM_BuIEummtTDiwWqjfUjXUMZD78VJzS0TRj5GJ4,15687
|
|
104
|
+
liger_kernel/transformers/model/loss_utils.py,sha256=mAV6NsE1xR2smQMlr_n9afh4ek3BhIfieZdTn1Z-9Fw,2836
|
|
105
|
+
liger_kernel/transformers/model/mistral.py,sha256=OcwOzVDMwwDbVccVPv-AaocznzWwzLT3aRaKK5SMaAg,6030
|
|
106
|
+
liger_kernel/transformers/model/mixtral.py,sha256=YcBDoTEJDgLFJ_RTo180DYGxR8D5Ad9-idumif7kCPE,12130
|
|
107
|
+
liger_kernel/transformers/model/mllama.py,sha256=vAHwCm63sn4kpAY0rDGf_N0HR7KRTBVpBYDVTPOaZTg,12079
|
|
108
|
+
liger_kernel/transformers/model/olmo2.py,sha256=-h2bUOeuPfY1MdShdRvq5_wFDHKP4PEimgIl0fL-BT4,5902
|
|
109
|
+
liger_kernel/transformers/model/olmo3.py,sha256=k2zYOlS8U_b5MwjdToB3tDRQ0bH_mWapVQqJcH8-qAo,6007
|
|
110
|
+
liger_kernel/transformers/model/output_classes.py,sha256=0BGXVR4dYQpSHLkSqpRoXuHMryrceGSlTYRu6pvd8ZY,4542
|
|
111
|
+
liger_kernel/transformers/model/paligemma.py,sha256=UAYoKkIMvvix7GG3cSdWaDxVjMp26YsvthJuE7wFf6Y,20848
|
|
112
|
+
liger_kernel/transformers/model/phi3.py,sha256=PT7Kw6yySg-7TsssWfi82eVMN3SWujCqzCqHigAdfeQ,4574
|
|
113
|
+
liger_kernel/transformers/model/qwen2.py,sha256=ojqdJpD3A9A5uCS0N_rSq8gyNYWSsHfuvx3Z3ObC7ss,10686
|
|
114
|
+
liger_kernel/transformers/model/qwen2_5_vl.py,sha256=FbIZDcg9cOr4PtBLNN8yVubN-gu2clndjSIzfi8NMos,6894
|
|
115
|
+
liger_kernel/transformers/model/qwen2_vl.py,sha256=967Ex4Scm0ehhiVxOtjwfj396nD9xkAwFwHcoURH6-o,6578
|
|
116
|
+
liger_kernel/transformers/model/qwen3.py,sha256=1fvioVmq5CRZSIuTd7uuLet-fti9ee3r8eLibvfNTcQ,5769
|
|
117
|
+
liger_kernel/transformers/model/qwen3_moe.py,sha256=yljJO4kyeM5U2Q4pXH3Mmq71ZFEC_Z73qgBx1-an-o8,6457
|
|
118
|
+
liger_kernel/transformers/model/qwen3_next.py,sha256=TayfD91GVLA1-fJwtVl6vMZgkUTYLQYURMRGBdCtnFc,6331
|
|
119
|
+
liger_kernel/transformers/model/qwen3_vl.py,sha256=sUIdJ-32IlFm_4pHv6PpLgVafqBS0QeJm_91tY67NdY,6646
|
|
120
|
+
liger_kernel/transformers/model/qwen3_vl_moe.py,sha256=CJEFcwBqItSEw9NA0mhEozlDTgIuJQ6VTjgkh5iLZ78,4856
|
|
121
|
+
liger_kernel/transformers/model/smollm3.py,sha256=1ewDY-99UAFJEfoeqfZxDcxjkqKYUSr5b7X-E_2BLLs,8126
|
|
122
|
+
liger_kernel/transformers/model/smolvlm.py,sha256=yFpPKawLVo3zXzLjM7Y_T8FyRrPxVyp-YPFMM8m3k0c,6734
|
|
123
|
+
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
124
|
+
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
125
|
+
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
126
|
+
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
127
|
+
liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
128
|
+
liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/METADATA,sha256=59Pbve5_NHRYrjw5M0afeMEZJaiYXZY8Gll41QiH-FQ,25660
|
|
129
|
+
liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
130
|
+
liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
131
|
+
liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
132
|
+
liger_kernel_nightly-0.6.4.dev20260112233432.dist-info/RECORD,,
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
|
|
3
|
-
liger_kernel/utils.py,sha256=178Hn8uD-VauDT6FjqMyXLbKLod8ObIpaTtapHwfEK0,1861
|
|
4
|
-
liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
|
|
5
|
-
liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
|
|
6
|
-
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
|
7
|
-
liger_kernel/chunked_loss/dpo_loss.py,sha256=xZwGqS04si9zXyob95SAdalC-hajZg8fWINqiqffN8k,5855
|
|
8
|
-
liger_kernel/chunked_loss/functional.py,sha256=9G3nKm-Bi7uoZRFkL8wwGMl6juDl4bSzDvTa5GHZPzg,955
|
|
9
|
-
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
|
|
10
|
-
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=2_UvvIksUP45RBw3c-88-jOtjGATf04vaWopcqtX4Oo,12688
|
|
11
|
-
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=ojB42jYPu0c4ki96Ft-hy7Sf6fh_WikG-aWNrlZzSio,18362
|
|
12
|
-
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
|
|
13
|
-
liger_kernel/chunked_loss/grpo_loss.py,sha256=6Mb4ZT6MfnOr4Xo681rMR0LKkhzJhInvQp8wp2YVMK0,8913
|
|
14
|
-
liger_kernel/chunked_loss/jsd_loss.py,sha256=u2ahkuHsbhpNaKcpBCz5gCMDk9ou-P04DHji592dIBo,7067
|
|
15
|
-
liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
|
|
16
|
-
liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
|
|
17
|
-
liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
|
|
18
|
-
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
liger_kernel/ops/cross_entropy.py,sha256=T5oSsqOS1y-Iea5o9v_BSU-_mIEXqWAT1oX_m59NcA4,18941
|
|
20
|
-
liger_kernel/ops/dyt.py,sha256=YD1-buHz9VmIX838VKzLc-lm5CeUQ4LAskGDWBUMQHA,6187
|
|
21
|
-
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1Y3Uk_TCSjqKgoG2eot1ptnWXJXXQESqGvOmqAW1gsM,10912
|
|
22
|
-
liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
|
|
23
|
-
liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,4126
|
|
24
|
-
liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
|
|
25
|
-
liger_kernel/ops/jsd.py,sha256=rkloGA7nDfVaa5nKY6-EYBw0E1p_MSsl4fr2xZGTp04,6961
|
|
26
|
-
liger_kernel/ops/kl_div.py,sha256=NkG7D6_DnPBzr-ohhYiQbRBnq_fbGmpn5UU7y0UBKQo,8420
|
|
27
|
-
liger_kernel/ops/layer_norm.py,sha256=6roQjioyg-9O2qLPV8nL4U0-5UH80tdzOMTWwjvDnn8,7961
|
|
28
|
-
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
29
|
-
liger_kernel/ops/rms_norm.py,sha256=PWLJcdIKU5e-8BuYFHd9Cqlq6wmr6fUXKi9zQD4LetU,11727
|
|
30
|
-
liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
|
|
31
|
-
liger_kernel/ops/swiglu.py,sha256=KmgMjaJQnbLLgZn2nEpbwHU_xpnYRweCyrLQSVvM1vA,3015
|
|
32
|
-
liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
33
|
-
liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
|
|
34
|
-
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
35
|
-
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
36
|
-
liger_kernel/transformers/__init__.py,sha256=t70gqygxH63iz-B0MOdZx4AEgA8MfqU1G7N6dvIneCY,2618
|
|
37
|
-
liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
|
|
38
|
-
liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
|
|
39
|
-
liger_kernel/transformers/dyt.py,sha256=QMqqc14pkE0WhpRZvapfnNAun-6C0C_tHExL2ZJuCUA,648
|
|
40
|
-
liger_kernel/transformers/functional.py,sha256=4h9Pdx_iINBqfv2Zod_c27qOpYXDDwbdVgatQ9_XBmI,5089
|
|
41
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=09Rt7FZzLH42VOcIbQ4dlQd0o3Rlb4vk6fqiOQ7WTD8,1778
|
|
42
|
-
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
43
|
-
liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
|
|
44
|
-
liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
|
|
45
|
-
liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
|
|
46
|
-
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
47
|
-
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
48
|
-
liger_kernel/transformers/monkey_patch.py,sha256=95afvIrZA9xSWLNIJspBLbz8lxv2Y5gfZke7MyqoOX8,56965
|
|
49
|
-
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
50
|
-
liger_kernel/transformers/rms_norm.py,sha256=GqCEJuGt0YdqqlMcToE0Wp4A8YFquDa4UUSyH2uFW2A,1191
|
|
51
|
-
liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
|
|
52
|
-
liger_kernel/transformers/swiglu.py,sha256=i9WTqcNRqReU4XJs391IPbl-I5X0wG4T72D4pqGFfJg,2422
|
|
53
|
-
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
54
|
-
liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
|
|
55
|
-
liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
|
|
56
|
-
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
liger_kernel/transformers/model/gemma.py,sha256=7cBTljzh-8_ACBhYl6NUfj5_ux92YRlmnAU5gfDAQAI,9312
|
|
58
|
-
liger_kernel/transformers/model/gemma2.py,sha256=X0FOIhvFlTrmWI7Ws06wUkutgHW3lWtLOnnHp1NgZ3A,10403
|
|
59
|
-
liger_kernel/transformers/model/llama.py,sha256=d9rBaK8e8RSMCFHdgom9ZHuXOlnh6U_o-GkAFGRNGOY,9989
|
|
60
|
-
liger_kernel/transformers/model/llava.py,sha256=b0pEagjUbu2-eS9xegjyfl1DwIXLwZcNpff55ibaMbA,17601
|
|
61
|
-
liger_kernel/transformers/model/loss_utils.py,sha256=Z-fUrf-cUDUjUIH7Tl9OL2hT8nmtx7ES3kg8syuWKy4,1476
|
|
62
|
-
liger_kernel/transformers/model/mistral.py,sha256=o7tyl1sPWPfZwwrBLRlryHlSI8I55viuJoMI5Bh5Nww,5014
|
|
63
|
-
liger_kernel/transformers/model/mixtral.py,sha256=T0ITv2-PkR8VErVOVUizoS4EzjmARyR7GFh0tXDB_i4,11089
|
|
64
|
-
liger_kernel/transformers/model/mllama.py,sha256=RCKtwnGOMFYIbtt1zUQ15Cyv4eNpHkTWcgkmG2EEs2I,10804
|
|
65
|
-
liger_kernel/transformers/model/olmo2.py,sha256=5M8kczp4D-jvbjcV7cKATIJGF34xd-Rs-PPdKZWSIlY,4685
|
|
66
|
-
liger_kernel/transformers/model/paligemma.py,sha256=GNReT6tVZt3ON6aaa9ovg8mnu1hYocSx9OhgC7b-_28,19191
|
|
67
|
-
liger_kernel/transformers/model/phi3.py,sha256=NmU2DuU1Huwha6K7YSsJCnvQfUovTTGlsfBZhbx0UoI,9951
|
|
68
|
-
liger_kernel/transformers/model/qwen2.py,sha256=t7NotBHoebsPqNSxwaf9DXTg8jxgB5BdunSGqYOE0hQ,9240
|
|
69
|
-
liger_kernel/transformers/model/qwen2_5_vl.py,sha256=70BnHZjx6eQWTwi3zc5SMwxTeOOA4Tbdkfy6IYRcTaM,9289
|
|
70
|
-
liger_kernel/transformers/model/qwen2_vl.py,sha256=zo4O9fShNHYqSLrzLGqQYWSMtJI6UHaSY7zvMCYWyD8,9685
|
|
71
|
-
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
72
|
-
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
|
|
73
|
-
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
74
|
-
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
75
|
-
liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
76
|
-
liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/METADATA,sha256=dxGuBAPSdYloQREOnavE1Dd5jAaDWMwI-NpEG0ku_RU,22959
|
|
77
|
-
liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
78
|
-
liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
79
|
-
liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
80
|
-
liger_kernel_nightly-0.5.5.dev20250402185702.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|