liger-kernel-nightly 0.6.4.dev20251206103502__py3-none-any.whl → 0.6.4.dev20251209171241__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/transformers/monkey_patch.py +5 -6
- liger_kernel/transformers/rope.py +27 -26
- {liger_kernel_nightly-0.6.4.dev20251206103502.dist-info → liger_kernel_nightly-0.6.4.dev20251209171241.dist-info}/METADATA +4 -1
- {liger_kernel_nightly-0.6.4.dev20251206103502.dist-info → liger_kernel_nightly-0.6.4.dev20251209171241.dist-info}/RECORD +8 -8
- {liger_kernel_nightly-0.6.4.dev20251206103502.dist-info → liger_kernel_nightly-0.6.4.dev20251209171241.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502.dist-info → liger_kernel_nightly-0.6.4.dev20251209171241.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502.dist-info → liger_kernel_nightly-0.6.4.dev20251209171241.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502.dist-info → liger_kernel_nightly-0.6.4.dev20251209171241.dist-info}/top_level.txt +0 -0
|
@@ -35,8 +35,7 @@ from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_f
|
|
|
35
35
|
from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
|
|
36
36
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
|
37
37
|
from liger_kernel.transformers.rope import liger_rotary_pos_emb
|
|
38
|
-
from liger_kernel.transformers.rope import
|
|
39
|
-
from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
|
|
38
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb_vision
|
|
40
39
|
from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
|
|
41
40
|
from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
|
|
42
41
|
from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
|
|
@@ -1754,8 +1753,8 @@ def apply_liger_kernel_to_qwen3_vl(
|
|
|
1754
1753
|
from liger_kernel.transformers.model.qwen3_vl import lce_forward as qwen3_vl_lce_forward
|
|
1755
1754
|
|
|
1756
1755
|
if rope:
|
|
1757
|
-
modeling_qwen3_vl.apply_rotary_pos_emb =
|
|
1758
|
-
modeling_qwen3_vl.apply_rotary_pos_emb_vision =
|
|
1756
|
+
modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb
|
|
1757
|
+
modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_vision
|
|
1759
1758
|
|
|
1760
1759
|
if rms_norm:
|
|
1761
1760
|
modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
|
|
@@ -1829,8 +1828,8 @@ def apply_liger_kernel_to_qwen3_vl_moe(
|
|
|
1829
1828
|
from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl_moe_lce_forward
|
|
1830
1829
|
|
|
1831
1830
|
if rope:
|
|
1832
|
-
modeling_qwen3_vl_moe.apply_rotary_pos_emb =
|
|
1833
|
-
modeling_qwen3_vl_moe.apply_rotary_pos_emb_vision =
|
|
1831
|
+
modeling_qwen3_vl_moe.apply_rotary_pos_emb = liger_rotary_pos_emb
|
|
1832
|
+
modeling_qwen3_vl_moe.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_vision
|
|
1834
1833
|
|
|
1835
1834
|
if rms_norm:
|
|
1836
1835
|
modeling_qwen3_vl_moe.Qwen3VLMoeTextRMSNorm = LigerRMSNorm
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
1
|
from typing import Tuple
|
|
3
2
|
|
|
4
3
|
import torch
|
|
@@ -25,39 +24,41 @@ def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
|
|
|
25
24
|
return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
|
|
26
25
|
|
|
27
26
|
|
|
28
|
-
def
|
|
27
|
+
def liger_rotary_pos_emb_vision(
|
|
29
28
|
q: torch.Tensor,
|
|
30
29
|
k: torch.Tensor,
|
|
31
30
|
cos: torch.Tensor,
|
|
32
31
|
sin: torch.Tensor,
|
|
33
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
34
|
-
unsqueeze_dim: int = 1,
|
|
35
32
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
33
|
+
"""
|
|
34
|
+
Modified version of liger_rotary_pos_emb for qwen3_vl's apply_rotary_pos_emb_vision function.
|
|
35
|
+
Manually tranposed the input and output to match the expected shape for liger_rotary_pos_emb.
|
|
36
|
+
Reference: https://https://github.com/huggingface/transformers/blob/v5.0.0rc0/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L116
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
q (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
|
|
40
|
+
with stride (num_heads * head_dim, head_dim, 1).
|
|
41
|
+
k (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
|
|
42
|
+
with stride (num_heads * head_dim, head_dim, 1). Same as q.
|
|
43
|
+
cos (torch.Tensor): The cosine tensor of shape (seq_length, head_dim).
|
|
44
|
+
sin (torch.Tensor): The sine tensor of shape (seq_length, head_dim).
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Tuple[torch.Tensor, torch.Tensor]: The query and key tensors with the same shape and stride as inputs.
|
|
48
|
+
"""
|
|
36
49
|
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
37
50
|
|
|
38
|
-
|
|
39
|
-
|
|
51
|
+
# tranpose to (1, num_heads, seq_length, head_dim) and cast to float32 to match liger_rotary_pos_emb input shape
|
|
52
|
+
# also unsqueeze for batch dim
|
|
53
|
+
q32 = q.to(torch.float32).unsqueeze(0).transpose(1, 2)
|
|
54
|
+
k32 = k.to(torch.float32).unsqueeze(0).transpose(1, 2)
|
|
40
55
|
cos32 = cos.to(torch.float32)
|
|
41
56
|
sin32 = sin.to(torch.float32)
|
|
42
57
|
|
|
43
|
-
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32
|
|
44
|
-
return q_out.to(orig_q_dtype), k_out.to(orig_k_dtype)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
48
|
-
q: torch.Tensor,
|
|
49
|
-
k: torch.Tensor,
|
|
50
|
-
cos: torch.Tensor,
|
|
51
|
-
sin: torch.Tensor,
|
|
52
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
53
|
-
unsqueeze_dim: int = 1,
|
|
54
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
55
|
-
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
56
|
-
|
|
57
|
-
q32 = q.to(torch.float32).unsqueeze(0)
|
|
58
|
-
k32 = k.to(torch.float32).unsqueeze(0)
|
|
59
|
-
cos32 = cos.to(torch.float32).unsqueeze(0)
|
|
60
|
-
sin32 = sin.to(torch.float32).unsqueeze(0)
|
|
58
|
+
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32)
|
|
61
59
|
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
# transpose back to (seq_length, num_heads, head_dim) and cast back to original dtype
|
|
61
|
+
# also squeeze out batch dim
|
|
62
|
+
q_out = q_out.transpose(1, 2).squeeze(0).to(orig_q_dtype)
|
|
63
|
+
k_out = k_out.transpose(1, 2).squeeze(0).to(orig_k_dtype)
|
|
64
|
+
return q_out, k_out
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.4.
|
|
3
|
+
Version: 0.6.4.dev20251209171241
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -113,6 +113,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
|
|
|
113
113
|
|
|
114
114
|
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
115
115
|
|
|
116
|
+
You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
|
|
117
|
+
|
|
116
118
|
## Supercharge Your Model with Liger Kernel
|
|
117
119
|
|
|
118
120
|

|
|
@@ -442,3 +444,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
|
|
|
442
444
|
↑ Back to Top ↑
|
|
443
445
|
</a>
|
|
444
446
|
</p>
|
|
447
|
+
|
|
@@ -60,12 +60,12 @@ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCc
|
|
|
60
60
|
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
61
61
|
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
62
62
|
liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
|
|
63
|
-
liger_kernel/transformers/monkey_patch.py,sha256=
|
|
63
|
+
liger_kernel/transformers/monkey_patch.py,sha256=3MtDn6_1lljiloWHFK_GOo9fCO61QwgXh6OAu9KQdAc,135705
|
|
64
64
|
liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
|
|
65
65
|
liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
|
|
66
66
|
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
67
67
|
liger_kernel/transformers/rms_norm.py,sha256=HwddVqrqS58jE-M2_4NkFGARtCDBhGnkKyjBN9b3FYI,3004
|
|
68
|
-
liger_kernel/transformers/rope.py,sha256=
|
|
68
|
+
liger_kernel/transformers/rope.py,sha256=fmSG2h3GVtx9nqPNUOqH8TvGRKaJhsVZJMzsdVIMqPM,2872
|
|
69
69
|
liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
|
|
70
70
|
liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
|
|
71
71
|
liger_kernel/transformers/swiglu.py,sha256=dRR69wDWSWfdjtnsTECyxQqWVo5QkdXdXm9SpSQ4Jvw,4291
|
|
@@ -111,9 +111,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
|
111
111
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
112
112
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
113
113
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
114
|
-
liger_kernel_nightly-0.6.4.
|
|
115
|
-
liger_kernel_nightly-0.6.4.
|
|
116
|
-
liger_kernel_nightly-0.6.4.
|
|
117
|
-
liger_kernel_nightly-0.6.4.
|
|
118
|
-
liger_kernel_nightly-0.6.4.
|
|
119
|
-
liger_kernel_nightly-0.6.4.
|
|
114
|
+
liger_kernel_nightly-0.6.4.dev20251209171241.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
115
|
+
liger_kernel_nightly-0.6.4.dev20251209171241.dist-info/METADATA,sha256=UgQQVN7vzeOeYXRTEMJucnAgorAIj6QRpv8vBj5fin8,25468
|
|
116
|
+
liger_kernel_nightly-0.6.4.dev20251209171241.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
117
|
+
liger_kernel_nightly-0.6.4.dev20251209171241.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
118
|
+
liger_kernel_nightly-0.6.4.dev20251209171241.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
119
|
+
liger_kernel_nightly-0.6.4.dev20251209171241.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|