liger-kernel-nightly 0.6.4.dev20251202054858__py3-none-any.whl → 0.6.4.dev20260107181130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- liger_kernel/chunked_loss/cosine_similarity_loss.py +7 -1
- liger_kernel/chunked_loss/fused_linear_distillation.py +10 -3
- liger_kernel/chunked_loss/jsd_loss.py +21 -6
- liger_kernel/ops/__init__.py +141 -0
- liger_kernel/ops/backends/README.md +151 -0
- liger_kernel/ops/backends/__init__.py +13 -0
- liger_kernel/ops/backends/_ascend/__init__.py +5 -0
- liger_kernel/ops/backends/_ascend/ascend-ub-manager-design.md +485 -0
- liger_kernel/ops/backends/_ascend/ops/__init__.py +43 -0
- liger_kernel/ops/backends/_ascend/ops/geglu.py +244 -0
- liger_kernel/ops/backends/_ascend/ops/qwen2vl_mrope.py +285 -0
- liger_kernel/ops/backends/_ascend/ops/rope.py +290 -0
- liger_kernel/ops/backends/_ascend/ops/swiglu.py +142 -0
- liger_kernel/ops/backends/_ascend/ub_manager.py +349 -0
- liger_kernel/ops/backends/registry.py +61 -0
- liger_kernel/ops/cross_entropy.py +12 -3
- liger_kernel/ops/fused_linear_cross_entropy.py +2 -1
- liger_kernel/ops/geglu.py +3 -2
- liger_kernel/ops/rms_norm.py +126 -49
- liger_kernel/ops/utils.py +12 -0
- liger_kernel/transformers/__init__.py +3 -0
- liger_kernel/transformers/auto_model.py +21 -0
- liger_kernel/transformers/cross_entropy.py +1 -1
- liger_kernel/transformers/dyt.py +1 -1
- liger_kernel/transformers/experimental/embedding.py +1 -1
- liger_kernel/transformers/functional.py +20 -20
- liger_kernel/transformers/fused_add_rms_norm.py +1 -1
- liger_kernel/transformers/fused_linear_cross_entropy.py +1 -1
- liger_kernel/transformers/fused_linear_jsd.py +1 -1
- liger_kernel/transformers/fused_neighborhood_attention.py +1 -1
- liger_kernel/transformers/geglu.py +1 -1
- liger_kernel/transformers/group_norm.py +1 -1
- liger_kernel/transformers/grpo_loss.py +1 -1
- liger_kernel/transformers/jsd.py +1 -1
- liger_kernel/transformers/kl_div.py +1 -1
- liger_kernel/transformers/layer_norm.py +1 -1
- liger_kernel/transformers/llama4_rope.py +1 -1
- liger_kernel/transformers/model/gemma3.py +1 -0
- liger_kernel/transformers/model/gpt_oss.py +211 -0
- liger_kernel/transformers/model/paligemma.py +1 -0
- liger_kernel/transformers/monkey_patch.py +118 -39
- liger_kernel/transformers/multi_token_attention.py +1 -1
- liger_kernel/transformers/poly_norm.py +1 -1
- liger_kernel/transformers/qwen2vl_mrope.py +1 -1
- liger_kernel/transformers/rms_norm.py +8 -3
- liger_kernel/transformers/rope.py +28 -27
- liger_kernel/transformers/softmax.py +1 -1
- liger_kernel/transformers/sparsemax.py +1 -1
- liger_kernel/transformers/swiglu.py +1 -1
- liger_kernel/transformers/tiled_mlp.py +3 -3
- liger_kernel/transformers/tvd.py +1 -1
- liger_kernel/utils.py +27 -0
- {liger_kernel_nightly-0.6.4.dev20251202054858.dist-info → liger_kernel_nightly-0.6.4.dev20260107181130.dist-info}/METADATA +9 -3
- {liger_kernel_nightly-0.6.4.dev20251202054858.dist-info → liger_kernel_nightly-0.6.4.dev20260107181130.dist-info}/RECORD +58 -46
- {liger_kernel_nightly-0.6.4.dev20251202054858.dist-info → liger_kernel_nightly-0.6.4.dev20260107181130.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.4.dev20251202054858.dist-info → liger_kernel_nightly-0.6.4.dev20260107181130.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.4.dev20251202054858.dist-info → liger_kernel_nightly-0.6.4.dev20260107181130.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.4.dev20251202054858.dist-info → liger_kernel_nightly-0.6.4.dev20260107181130.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
1
|
from typing import Tuple
|
|
3
2
|
|
|
4
3
|
import torch
|
|
5
4
|
|
|
6
|
-
from liger_kernel.ops
|
|
5
|
+
from liger_kernel.ops import LigerRopeFunction
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
|
|
@@ -25,39 +24,41 @@ def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
|
|
|
25
24
|
return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
|
|
26
25
|
|
|
27
26
|
|
|
28
|
-
def
|
|
27
|
+
def liger_rotary_pos_emb_vision(
|
|
29
28
|
q: torch.Tensor,
|
|
30
29
|
k: torch.Tensor,
|
|
31
30
|
cos: torch.Tensor,
|
|
32
31
|
sin: torch.Tensor,
|
|
33
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
34
|
-
unsqueeze_dim: int = 1,
|
|
35
32
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
33
|
+
"""
|
|
34
|
+
Modified version of liger_rotary_pos_emb for qwen3_vl's apply_rotary_pos_emb_vision function.
|
|
35
|
+
Manually tranposed the input and output to match the expected shape for liger_rotary_pos_emb.
|
|
36
|
+
Reference: https://https://github.com/huggingface/transformers/blob/v5.0.0rc0/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L116
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
q (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
|
|
40
|
+
with stride (num_heads * head_dim, head_dim, 1).
|
|
41
|
+
k (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
|
|
42
|
+
with stride (num_heads * head_dim, head_dim, 1). Same as q.
|
|
43
|
+
cos (torch.Tensor): The cosine tensor of shape (seq_length, head_dim).
|
|
44
|
+
sin (torch.Tensor): The sine tensor of shape (seq_length, head_dim).
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Tuple[torch.Tensor, torch.Tensor]: The query and key tensors with the same shape and stride as inputs.
|
|
48
|
+
"""
|
|
36
49
|
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
37
50
|
|
|
38
|
-
|
|
39
|
-
|
|
51
|
+
# tranpose to (1, num_heads, seq_length, head_dim) and cast to float32 to match liger_rotary_pos_emb input shape
|
|
52
|
+
# also unsqueeze for batch dim
|
|
53
|
+
q32 = q.to(torch.float32).unsqueeze(0).transpose(1, 2)
|
|
54
|
+
k32 = k.to(torch.float32).unsqueeze(0).transpose(1, 2)
|
|
40
55
|
cos32 = cos.to(torch.float32)
|
|
41
56
|
sin32 = sin.to(torch.float32)
|
|
42
57
|
|
|
43
|
-
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32
|
|
44
|
-
return q_out.to(orig_q_dtype), k_out.to(orig_k_dtype)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
48
|
-
q: torch.Tensor,
|
|
49
|
-
k: torch.Tensor,
|
|
50
|
-
cos: torch.Tensor,
|
|
51
|
-
sin: torch.Tensor,
|
|
52
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
53
|
-
unsqueeze_dim: int = 1,
|
|
54
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
55
|
-
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
56
|
-
|
|
57
|
-
q32 = q.to(torch.float32).unsqueeze(0)
|
|
58
|
-
k32 = k.to(torch.float32).unsqueeze(0)
|
|
59
|
-
cos32 = cos.to(torch.float32).unsqueeze(0)
|
|
60
|
-
sin32 = sin.to(torch.float32).unsqueeze(0)
|
|
58
|
+
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32)
|
|
61
59
|
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
# transpose back to (seq_length, num_heads, head_dim) and cast back to original dtype
|
|
61
|
+
# also squeeze out batch dim
|
|
62
|
+
q_out = q_out.transpose(1, 2).squeeze(0).to(orig_q_dtype)
|
|
63
|
+
k_out = k_out.transpose(1, 2).squeeze(0).to(orig_k_dtype)
|
|
64
|
+
return q_out, k_out
|
|
@@ -2,9 +2,9 @@ from typing import Optional
|
|
|
2
2
|
|
|
3
3
|
import torch.nn as nn
|
|
4
4
|
|
|
5
|
-
from liger_kernel.ops
|
|
6
|
-
from liger_kernel.ops
|
|
7
|
-
from liger_kernel.ops
|
|
5
|
+
from liger_kernel.ops import LigerGELUMulFunction
|
|
6
|
+
from liger_kernel.ops import LigerSiLUMulFunction
|
|
7
|
+
from liger_kernel.ops import apply_tiled_mlp
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class LigerTiledGEGLUMLP(nn.Module):
|
liger_kernel/transformers/tvd.py
CHANGED
liger_kernel/utils.py
CHANGED
|
@@ -12,6 +12,33 @@ def is_peft_available():
|
|
|
12
12
|
return PEFT_AVAILABLE
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
def infer_comm_backend():
|
|
16
|
+
"""
|
|
17
|
+
Get communication backend name based on the environment.
|
|
18
|
+
"""
|
|
19
|
+
if torch.distributed.is_nccl_available():
|
|
20
|
+
# Works for Nvidia
|
|
21
|
+
# TODO: nccl may not work for AMD decices that may require use of rccl.
|
|
22
|
+
return "nccl"
|
|
23
|
+
elif is_npu_available():
|
|
24
|
+
# Use Ascend NPU if available (torch.npu)
|
|
25
|
+
# Ascend is not standard torch backend and requires extension.
|
|
26
|
+
# Assume that it is installed if NPUs are being used in
|
|
27
|
+
# multi device environment.
|
|
28
|
+
return "ascend"
|
|
29
|
+
# XPU (Intel) if available
|
|
30
|
+
elif torch.distributed.distributed_c10d.is_xccl_available():
|
|
31
|
+
return "xccl"
|
|
32
|
+
elif torch.distributed.is_mpi_available():
|
|
33
|
+
# CPU backend, first option
|
|
34
|
+
return "mpi"
|
|
35
|
+
elif torch.distributed.is_gloo_available():
|
|
36
|
+
# CPU backend, backup option
|
|
37
|
+
return "gloo"
|
|
38
|
+
else:
|
|
39
|
+
raise RuntimeError("There is no distributed backend available.")
|
|
40
|
+
|
|
41
|
+
|
|
15
42
|
def infer_device():
|
|
16
43
|
"""
|
|
17
44
|
Get current device name based on available devices
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.4.
|
|
3
|
+
Version: 0.6.4.dev20260107181130
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -79,8 +79,8 @@ Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
|
79
79
|
</a>
|
|
80
80
|
</td>
|
|
81
81
|
<td style="padding: 10px;">
|
|
82
|
-
<a href="https://discord.gg/
|
|
83
|
-
<img src="https://dcbadge.limes.pink/api/server/
|
|
82
|
+
<a href="https://discord.gg/X4MaxPgA">
|
|
83
|
+
<img src="https://dcbadge.limes.pink/api/server/https://discord.gg/X4MaxPgA?style=flat" alt="Join Our Discord">
|
|
84
84
|
</a>
|
|
85
85
|
</td>
|
|
86
86
|
</tr>
|
|
@@ -95,6 +95,7 @@ Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
|
95
95
|
<details>
|
|
96
96
|
<summary>Latest News 🔥</summary>
|
|
97
97
|
|
|
98
|
+
- [2025/12/19] We announced a liger kernel discord channel at https://discord.gg/X4MaxPgA; We will be hosting Liger Kernel x Triton China Meetup in mid of January 2026
|
|
98
99
|
- [2025/03/06] We release a joint blog post on TorchTune × Liger - [Peak Performance, Minimized Memory: Optimizing torchtune’s performance with torch.compile & Liger Kernel](https://pytorch.org/blog/peak-performance-minimized-memory/)
|
|
99
100
|
- [2024/12/11] We release [v0.5.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.5.0): 80% more memory efficient post training losses (DPO, ORPO, CPO, etc)!
|
|
100
101
|
- [2024/12/5] We release LinkedIn Engineering Blog - [Liger-Kernel: Empowering an open source ecosystem of Triton Kernels for Efficient LLM Training](https://www.linkedin.com/blog/engineering/open-source/liger-kernel-open-source-ecosystem-for-efficient-llm-training)
|
|
@@ -113,6 +114,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
|
|
|
113
114
|
|
|
114
115
|
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
115
116
|
|
|
117
|
+
You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
|
|
118
|
+
|
|
116
119
|
## Supercharge Your Model with Liger Kernel
|
|
117
120
|
|
|
118
121
|

|
|
@@ -312,6 +315,7 @@ loss.backward()
|
|
|
312
315
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
313
316
|
| Olmo3 | `liger_kernel.transformers.apply_liger_kernel_to_olmo3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
314
317
|
| GLM-4 | `liger_kernel.transformers.apply_liger_kernel_to_glm4` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
318
|
+
| GPT-OSS | `liger_kernel.transformers.apply_liger_kernel_to_gpt_oss` | RoPE, RMSNorm, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
315
319
|
| InternVL3 | `liger_kernel.transformers.apply_liger_kernel_to_internvl` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
316
320
|
| HunyuanV1 | `liger_kernel.transformers.apply_liger_kernel_to_hunyuan_v1_dense` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
317
321
|
| HunyuanV1 MoE | `liger_kernel.transformers.apply_liger_kernel_to_hunyuan_v1_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -441,3 +445,5 @@ url={https://openreview.net/forum?id=36SjAIT42G}
|
|
|
441
445
|
↑ Back to Top ↑
|
|
442
446
|
</a>
|
|
443
447
|
</p>
|
|
448
|
+
|
|
449
|
+
|
|
@@ -1,29 +1,29 @@
|
|
|
1
1
|
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
|
|
3
|
-
liger_kernel/utils.py,sha256=
|
|
3
|
+
liger_kernel/utils.py,sha256=1SXJmyVtn-aoyUkA-Acsur_hdHqtxoGqN4v4Vk820bE,3845
|
|
4
4
|
liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
|
|
5
5
|
liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
|
|
6
|
-
liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=
|
|
6
|
+
liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=h8lPAkw8oYPUUBZ5YEG2tMMmQ7XkWnOo7r29A5vx-Eg,4759
|
|
7
7
|
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
|
8
8
|
liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
|
|
9
9
|
liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
|
|
10
|
-
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=
|
|
10
|
+
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=Bjdxnjfg-GwYvMJ102juP06gFMlbkxYPpvv7cV_mZcg,12912
|
|
11
11
|
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=baU19PwqO1FTVxwlB-eyJv6gOLtL7baXGzSncYQ8Ktc,14296
|
|
12
12
|
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=FIH85uUXAOgYx5Ax8MjFhJHVu-2pKtY7wSegd0zSyyY,18336
|
|
13
13
|
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
|
|
14
14
|
liger_kernel/chunked_loss/grpo_loss.py,sha256=bmuZaNgqNbJ5pJGFDXWE-B4BGYF7xWVSN15UyCfuq_s,13079
|
|
15
|
-
liger_kernel/chunked_loss/jsd_loss.py,sha256=
|
|
15
|
+
liger_kernel/chunked_loss/jsd_loss.py,sha256=NJKmJ76_-kI875ZkC4hQfC4nAvPNCj4ZsNyDNsfD74k,8761
|
|
16
16
|
liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
|
|
17
17
|
liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
|
|
18
18
|
liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
|
|
19
|
-
liger_kernel/ops/__init__.py,sha256=
|
|
20
|
-
liger_kernel/ops/cross_entropy.py,sha256=
|
|
19
|
+
liger_kernel/ops/__init__.py,sha256=F3m9qlXbgttykKEBsrMFf1WyK_0H8CKqLuDnFRR-cvc,7237
|
|
20
|
+
liger_kernel/ops/cross_entropy.py,sha256=DnXFRZ9TGN1SnEo8xGBFFPLNQaen8aLVNPJ1em-LbK4,22910
|
|
21
21
|
liger_kernel/ops/dyt.py,sha256=4XmkCCZaPPM8Tl4QHo6vSF2m68jrwsnjucrbyOJvZpM,5628
|
|
22
22
|
liger_kernel/ops/fused_add_rms_norm.py,sha256=lvwrLsKvoAQqS9KatgBkAyy0Xdecado-g0rvXYXaBak,14237
|
|
23
|
-
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=
|
|
23
|
+
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1gx2qljre9PVc861iknFnNCGC-P35D2w1cc_yMDO9ow,16239
|
|
24
24
|
liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
|
|
25
25
|
liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
|
|
26
|
-
liger_kernel/ops/geglu.py,sha256
|
|
26
|
+
liger_kernel/ops/geglu.py,sha256=-ruMACDsFH1YsAak6BGvZ0ktLGIrBE6yGF0dAyR82UU,4307
|
|
27
27
|
liger_kernel/ops/group_norm.py,sha256=zoy-TcNkYtKGmGhTFJmnyiG_4Es4ZphpqP8jtUSI6-I,10912
|
|
28
28
|
liger_kernel/ops/grpo_loss.py,sha256=2SyOujtF9I3xiNo4wFf4s6MeiDotE_qeYfRWgj_bOBE,9573
|
|
29
29
|
liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
|
|
@@ -33,55 +33,67 @@ liger_kernel/ops/llama4_rope.py,sha256=-aqdZzllklTN8b9--e-TsWY_ntGCN8-tyseT4x0bd
|
|
|
33
33
|
liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
|
|
34
34
|
liger_kernel/ops/poly_norm.py,sha256=5IdJEZnbbhblkL_X8UhSD4A2CooQbOAZJw8nAekWNs4,11372
|
|
35
35
|
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
36
|
-
liger_kernel/ops/rms_norm.py,sha256=
|
|
36
|
+
liger_kernel/ops/rms_norm.py,sha256=r97gpPmhbKz9qrBjxUEX0XP04aYu4psJeLe3KnhPZyo,21852
|
|
37
37
|
liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
|
|
38
38
|
liger_kernel/ops/softmax.py,sha256=tgORx6MK1IDDtZKqGarj0IPIVjqAIEUXXYPiinhRdtI,5864
|
|
39
39
|
liger_kernel/ops/sparsemax.py,sha256=AeWe1xgkHJFEKWTj2vu_0hj7LztGvjqXAps-QTpCY0U,5087
|
|
40
40
|
liger_kernel/ops/swiglu.py,sha256=D7nd4u_LInwsIRNCDdY77lqnTz8-W5dJrpEAt8zEO_A,3033
|
|
41
41
|
liger_kernel/ops/tiled_mlp.py,sha256=eyMFsFFgHch8a_6R6IYRG24_jqKg5GF_BQUoQuAG8SY,4529
|
|
42
42
|
liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
43
|
-
liger_kernel/ops/utils.py,sha256=
|
|
43
|
+
liger_kernel/ops/utils.py,sha256=Xu6MJ2-lbp4hSmI0JGImKguKU0KqWnFQDgQwOxSieyc,4360
|
|
44
|
+
liger_kernel/ops/backends/README.md,sha256=ZP59UUqD1WW8LwM5Y-cTpSM-Dtgdp8Wku2mE9kqAc2E,4185
|
|
45
|
+
liger_kernel/ops/backends/__init__.py,sha256=-mgef3cHfDFeL5NbXbq1TI7ngCahE9qqL3aMaHnXvis,629
|
|
46
|
+
liger_kernel/ops/backends/registry.py,sha256=yJa_Sh2FZ__iPCIU8h2nOQbnsFQh1I-_czROLtb1uQM,1637
|
|
47
|
+
liger_kernel/ops/backends/_ascend/__init__.py,sha256=6n0keOX9H-kLadBdVZlx-Ce0ZLVJvLiEfR-9-uxmYUk,221
|
|
48
|
+
liger_kernel/ops/backends/_ascend/ascend-ub-manager-design.md,sha256=FVXHSO1KY4ZFxCAE5r4hOYB2Q8ANyrJZ7WnFJ_GeQOA,19605
|
|
49
|
+
liger_kernel/ops/backends/_ascend/ub_manager.py,sha256=3h7sncZk00veBJS37a01YPt1SLeAxJj5N3lPdv1wXAk,13174
|
|
50
|
+
liger_kernel/ops/backends/_ascend/ops/__init__.py,sha256=R1iS9R0EtmGbrN0cSkIiRtZouVl7ndiPVZJIoEALb7s,1748
|
|
51
|
+
liger_kernel/ops/backends/_ascend/ops/geglu.py,sha256=hs1Cdhw0pbgZFiK1srLuo8DCe8jtnmhjm5SS2vw8-0M,8421
|
|
52
|
+
liger_kernel/ops/backends/_ascend/ops/qwen2vl_mrope.py,sha256=pUYcstJ4FuzDTkuhmQaO3U9gcVQoNCpzuwwUdtES5hM,11015
|
|
53
|
+
liger_kernel/ops/backends/_ascend/ops/rope.py,sha256=nOwtm6_eSnzDjl2S-jvGpwHrumAOgWfr5pNg6SL3R2k,10842
|
|
54
|
+
liger_kernel/ops/backends/_ascend/ops/swiglu.py,sha256=yrbEgIgeCZyayMYHCRNq7LntZE9cEemht39_TFPro0k,4682
|
|
44
55
|
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
45
56
|
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
46
|
-
liger_kernel/transformers/__init__.py,sha256=
|
|
47
|
-
liger_kernel/transformers/auto_model.py,sha256=
|
|
48
|
-
liger_kernel/transformers/cross_entropy.py,sha256=
|
|
49
|
-
liger_kernel/transformers/dyt.py,sha256=
|
|
57
|
+
liger_kernel/transformers/__init__.py,sha256=4sqcDbOZ_JtS9Ag-7oyuhq5jN298GyzjJFu9J-DyyZQ,10872
|
|
58
|
+
liger_kernel/transformers/auto_model.py,sha256=RnJhK8xHamRnnswgRLG_muJE1i6T6LszjK8lC6vonhE,2410
|
|
59
|
+
liger_kernel/transformers/cross_entropy.py,sha256=08H8RxSxGX_52UzrHNnSZ_wWH-uvU8KrRiDmVrkOw14,1996
|
|
60
|
+
liger_kernel/transformers/dyt.py,sha256=Rng-MZQSprnGGWFtpmYKt7MIX26vFUYbq5ruM4MjH-U,719
|
|
50
61
|
liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
|
|
51
|
-
liger_kernel/transformers/functional.py,sha256=
|
|
52
|
-
liger_kernel/transformers/fused_add_rms_norm.py,sha256=
|
|
53
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=
|
|
54
|
-
liger_kernel/transformers/fused_linear_jsd.py,sha256=
|
|
55
|
-
liger_kernel/transformers/fused_neighborhood_attention.py,sha256=
|
|
56
|
-
liger_kernel/transformers/geglu.py,sha256=
|
|
57
|
-
liger_kernel/transformers/group_norm.py,sha256=
|
|
58
|
-
liger_kernel/transformers/grpo_loss.py,sha256=
|
|
59
|
-
liger_kernel/transformers/jsd.py,sha256=
|
|
60
|
-
liger_kernel/transformers/kl_div.py,sha256=
|
|
61
|
-
liger_kernel/transformers/layer_norm.py,sha256=
|
|
62
|
-
liger_kernel/transformers/llama4_rope.py,sha256=
|
|
63
|
-
liger_kernel/transformers/monkey_patch.py,sha256=
|
|
64
|
-
liger_kernel/transformers/multi_token_attention.py,sha256=
|
|
65
|
-
liger_kernel/transformers/poly_norm.py,sha256=
|
|
66
|
-
liger_kernel/transformers/qwen2vl_mrope.py,sha256=
|
|
67
|
-
liger_kernel/transformers/rms_norm.py,sha256=
|
|
68
|
-
liger_kernel/transformers/rope.py,sha256
|
|
69
|
-
liger_kernel/transformers/softmax.py,sha256=
|
|
70
|
-
liger_kernel/transformers/sparsemax.py,sha256=
|
|
71
|
-
liger_kernel/transformers/swiglu.py,sha256=
|
|
72
|
-
liger_kernel/transformers/tiled_mlp.py,sha256=
|
|
62
|
+
liger_kernel/transformers/functional.py,sha256=f9sOWEfh5HZwOH5cVlcB_ts0MB_-fFFPki8PVZ5w__M,8352
|
|
63
|
+
liger_kernel/transformers/fused_add_rms_norm.py,sha256=k98sfcZhsgtdVxChciHmv0WUizzn6f-Rn72JtGgmafI,1180
|
|
64
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=WnGuR_rjIWO0XHUyVakz-qsIRm028OKzi1vayvmPfbg,2320
|
|
65
|
+
liger_kernel/transformers/fused_linear_jsd.py,sha256=BW22DX3J6J8uZdoaU9JFUU5HnTrNYL63H9IQZzHkGu0,3982
|
|
66
|
+
liger_kernel/transformers/fused_neighborhood_attention.py,sha256=21O9DSRXgMQst9Lc3b62CsOLkYn-hjuskj9Zi3mvG7Y,7928
|
|
67
|
+
liger_kernel/transformers/geglu.py,sha256=esltAhNJZjWydvh07C6EaTdjA2aQzFPMNK92yR15SEI,1101
|
|
68
|
+
liger_kernel/transformers/group_norm.py,sha256=k7LDIG8H5CA5kiNj2uOi8D_Z6FlZtQDLyzJQxK2E-gA,2162
|
|
69
|
+
liger_kernel/transformers/grpo_loss.py,sha256=wNVz1o3q9XH17tDqaCZFEVXJhH9mQX44pWhQEwiRo_Q,6088
|
|
70
|
+
liger_kernel/transformers/jsd.py,sha256=_KlOX8YcdONU0tq0bIRDQ5VDBwtywm3Ro-FmlmI01qk,2975
|
|
71
|
+
liger_kernel/transformers/kl_div.py,sha256=94VR4uuj-2dZCTEnwFksvDi-LporrpB5HgmYtQCZnw0,402
|
|
72
|
+
liger_kernel/transformers/layer_norm.py,sha256=l4nsT_Zj4CdVZOM7F0I0Ox-lmLHyIJzqQvVaF0o0HbI,895
|
|
73
|
+
liger_kernel/transformers/llama4_rope.py,sha256=A_nxcS_KiUCyNeL2FAZX7yUhDsX7krrI9BG49OaN_nM,3627
|
|
74
|
+
liger_kernel/transformers/monkey_patch.py,sha256=ESFIi_7hQMcnUtRLjAMJ9kbzSbwToDhpOfFa6aQ-SrY,135534
|
|
75
|
+
liger_kernel/transformers/multi_token_attention.py,sha256=LtEjG7qy1-JK-HIPaz8zZ4P08aSZTnj5D635Pa04Onc,1730
|
|
76
|
+
liger_kernel/transformers/poly_norm.py,sha256=T3VdLQHLcCY7KzNzrc6IJRs8SzO8Yc7a0BS_2p6d7Wo,1367
|
|
77
|
+
liger_kernel/transformers/qwen2vl_mrope.py,sha256=0hOBR3j2Yd6xbT4z9BNRKEy1D0eyOUsIW6EmI_3PPNI,1033
|
|
78
|
+
liger_kernel/transformers/rms_norm.py,sha256=dD_69_GA3GUdtvdYVxTLKGeg8QZinJpS3qfeV7WvOuA,3237
|
|
79
|
+
liger_kernel/transformers/rope.py,sha256=-W9aYLa2hMOmmG5yeHcvPsOI5UTc95ylYxUddxkwmkA,2867
|
|
80
|
+
liger_kernel/transformers/softmax.py,sha256=VI5QGHYpXSiXckgovEnDGcXwitimsxKB0GX-AT4dAC4,256
|
|
81
|
+
liger_kernel/transformers/sparsemax.py,sha256=Os49bSpPX4pWymsasv_3j20m8GFaI54e03XFPkHiPE0,393
|
|
82
|
+
liger_kernel/transformers/swiglu.py,sha256=LpgikAs9hibAL7G6itygBbOlW9tZe5s4D2IGAKGpbPw,4284
|
|
83
|
+
liger_kernel/transformers/tiled_mlp.py,sha256=gPsz7b0kxpk3mre7o1uGBt-XdNvMUN7IIqnUYIur-T0,4628
|
|
73
84
|
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
74
|
-
liger_kernel/transformers/tvd.py,sha256=
|
|
85
|
+
liger_kernel/transformers/tvd.py,sha256=GYjhtXgS3RTPveOTN2gyK4uBnjs6ii2vkSZRX21QpqA,446
|
|
75
86
|
liger_kernel/transformers/experimental/__init__.py,sha256=oQqk-f32JYgWEP9DJCj6ty6bbJSGrdXsFDQFwGeX6vI,127
|
|
76
|
-
liger_kernel/transformers/experimental/embedding.py,sha256=
|
|
87
|
+
liger_kernel/transformers/experimental/embedding.py,sha256=bjy9hHj--ivy6xEWdiE6qLy9uLyeS4PsBEgl_MdDrng,858
|
|
77
88
|
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
89
|
liger_kernel/transformers/model/falcon_h1.py,sha256=heUZ4wUt2ATmtBtmv8Rcro3pQl6fV9T0pburjTTW7os,5004
|
|
79
90
|
liger_kernel/transformers/model/gemma.py,sha256=pAri4PYpknsFfkvyo8Ez2NNlqrUDW-KkExUXTGZAcH4,10621
|
|
80
91
|
liger_kernel/transformers/model/gemma2.py,sha256=qa9Ok42vFojVGNmASTH3Ek566Vu507kjd--ZpZDKX9M,12024
|
|
81
|
-
liger_kernel/transformers/model/gemma3.py,sha256=
|
|
92
|
+
liger_kernel/transformers/model/gemma3.py,sha256=ZUrFCc-pfF8jYHV0HsptBr98hx6p2q9ea0kSzVAoFPo,14966
|
|
82
93
|
liger_kernel/transformers/model/glm4.py,sha256=bSp22iPIjsli4-c_usUOsyh1Bs2gIK8X6ynS0azseUs,5900
|
|
83
94
|
liger_kernel/transformers/model/glm4v.py,sha256=dd-BQpccDCp1SbIxcJ5rG8xcwYQK3KOv1Tgm9TGnZc4,6594
|
|
84
95
|
liger_kernel/transformers/model/glm4v_moe.py,sha256=zKhMdOOrRhlrvCSFaeVYfddL1ubpY8edEO91TN81n98,7135
|
|
96
|
+
liger_kernel/transformers/model/gpt_oss.py,sha256=8jEAQQNEXgVA-yuvEjKkBQvCvZy0E9ns-O9BPlajXXU,11197
|
|
85
97
|
liger_kernel/transformers/model/hunyuan_v1.py,sha256=MJvP9xkUFePIV0HLETJM4YPbVCEPkAE1ZI5Jxyiebh0,5731
|
|
86
98
|
liger_kernel/transformers/model/internvl.py,sha256=OOutracs9qrPHSU7FVYar08yinvGrHQVPvo39JEws6w,6473
|
|
87
99
|
liger_kernel/transformers/model/llama.py,sha256=kqZeONzwTBzudoChlKMzq1w23BtYGbxWZC1l1V__JTw,13410
|
|
@@ -94,7 +106,7 @@ liger_kernel/transformers/model/mllama.py,sha256=vAHwCm63sn4kpAY0rDGf_N0HR7KRTBV
|
|
|
94
106
|
liger_kernel/transformers/model/olmo2.py,sha256=-h2bUOeuPfY1MdShdRvq5_wFDHKP4PEimgIl0fL-BT4,5902
|
|
95
107
|
liger_kernel/transformers/model/olmo3.py,sha256=k2zYOlS8U_b5MwjdToB3tDRQ0bH_mWapVQqJcH8-qAo,6007
|
|
96
108
|
liger_kernel/transformers/model/output_classes.py,sha256=0BGXVR4dYQpSHLkSqpRoXuHMryrceGSlTYRu6pvd8ZY,4542
|
|
97
|
-
liger_kernel/transformers/model/paligemma.py,sha256=
|
|
109
|
+
liger_kernel/transformers/model/paligemma.py,sha256=UAYoKkIMvvix7GG3cSdWaDxVjMp26YsvthJuE7wFf6Y,20848
|
|
98
110
|
liger_kernel/transformers/model/phi3.py,sha256=PT7Kw6yySg-7TsssWfi82eVMN3SWujCqzCqHigAdfeQ,4574
|
|
99
111
|
liger_kernel/transformers/model/qwen2.py,sha256=ojqdJpD3A9A5uCS0N_rSq8gyNYWSsHfuvx3Z3ObC7ss,10686
|
|
100
112
|
liger_kernel/transformers/model/qwen2_5_vl.py,sha256=FbIZDcg9cOr4PtBLNN8yVubN-gu2clndjSIzfi8NMos,6894
|
|
@@ -110,9 +122,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
|
110
122
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
111
123
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
112
124
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
113
|
-
liger_kernel_nightly-0.6.4.
|
|
114
|
-
liger_kernel_nightly-0.6.4.
|
|
115
|
-
liger_kernel_nightly-0.6.4.
|
|
116
|
-
liger_kernel_nightly-0.6.4.
|
|
117
|
-
liger_kernel_nightly-0.6.4.
|
|
118
|
-
liger_kernel_nightly-0.6.4.
|
|
125
|
+
liger_kernel_nightly-0.6.4.dev20260107181130.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
126
|
+
liger_kernel_nightly-0.6.4.dev20260107181130.dist-info/METADATA,sha256=QpZmACb6StxhHZq45yyU_eXWsjjZ7R-ncI-CgJ9mWTU,25660
|
|
127
|
+
liger_kernel_nightly-0.6.4.dev20260107181130.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
128
|
+
liger_kernel_nightly-0.6.4.dev20260107181130.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
129
|
+
liger_kernel_nightly-0.6.4.dev20260107181130.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
130
|
+
liger_kernel_nightly-0.6.4.dev20260107181130.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|