liger-kernel-nightly 0.6.3.dev20251101160510__py3-none-any.whl → 0.6.3.dev20251105012545__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/transformers/model/qwen3_vl_moe.py +2 -4
- liger_kernel/transformers/monkey_patch.py +3 -9
- liger_kernel/transformers/rope.py +3 -7
- {liger_kernel_nightly-0.6.3.dev20251101160510.dist-info → liger_kernel_nightly-0.6.3.dev20251105012545.dist-info}/METADATA +1 -1
- {liger_kernel_nightly-0.6.3.dev20251101160510.dist-info → liger_kernel_nightly-0.6.3.dev20251105012545.dist-info}/RECORD +9 -9
- {liger_kernel_nightly-0.6.3.dev20251101160510.dist-info → liger_kernel_nightly-0.6.3.dev20251105012545.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510.dist-info → liger_kernel_nightly-0.6.3.dev20251105012545.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510.dist-info → liger_kernel_nightly-0.6.3.dev20251105012545.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510.dist-info → liger_kernel_nightly-0.6.3.dev20251105012545.dist-info}/top_level.txt +0 -0
|
@@ -5,13 +5,11 @@ from typing import Union
|
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
7
|
|
|
8
|
+
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeCausalLMOutputWithPast
|
|
9
|
+
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import load_balancing_loss_func
|
|
8
10
|
from transformers.utils import can_return_tuple
|
|
9
11
|
|
|
10
12
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
|
11
|
-
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
|
|
12
|
-
Qwen3VLMoeCausalLMOutputWithPast,
|
|
13
|
-
load_balancing_loss_func,
|
|
14
|
-
)
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
@can_return_tuple
|
|
@@ -6,7 +6,6 @@ from types import MethodType
|
|
|
6
6
|
from typing import Callable
|
|
7
7
|
from typing import Optional
|
|
8
8
|
|
|
9
|
-
import torch
|
|
10
9
|
import transformers
|
|
11
10
|
|
|
12
11
|
from packaging import version
|
|
@@ -36,11 +35,9 @@ from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl
|
|
|
36
35
|
from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_forward
|
|
37
36
|
from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
|
|
38
37
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
|
39
|
-
from liger_kernel.transformers.rope import
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
liger_rotary_pos_emb_with_cast_and_leading_batch,
|
|
43
|
-
)
|
|
38
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb
|
|
39
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast
|
|
40
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
|
|
44
41
|
from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
|
|
45
42
|
from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
|
|
46
43
|
from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
|
|
@@ -64,7 +61,6 @@ def _bind_method_to_module(module, method_name: str, new_method: Callable):
|
|
|
64
61
|
module.__dict__[method_name] = new_method.__get__(module, module.__class__)
|
|
65
62
|
|
|
66
63
|
|
|
67
|
-
|
|
68
64
|
def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True, row_mode=None):
|
|
69
65
|
# Check if the module is a PEFT ModulesToSaveWrapper
|
|
70
66
|
# If it is, we need to patch the modules_to_save.default and original_modules
|
|
@@ -1651,7 +1647,6 @@ def apply_liger_kernel_to_qwen2_5_vl(
|
|
|
1651
1647
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
1652
1648
|
|
|
1653
1649
|
|
|
1654
|
-
|
|
1655
1650
|
def apply_liger_kernel_to_qwen3_vl(
|
|
1656
1651
|
rope: bool = True,
|
|
1657
1652
|
cross_entropy: bool = False,
|
|
@@ -1688,7 +1683,6 @@ def apply_liger_kernel_to_qwen3_vl(
|
|
|
1688
1683
|
modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb_with_cast
|
|
1689
1684
|
modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_with_cast_and_leading_batch
|
|
1690
1685
|
|
|
1691
|
-
|
|
1692
1686
|
if rms_norm:
|
|
1693
1687
|
modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
|
|
1694
1688
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from typing import Tuple
|
|
2
3
|
|
|
3
4
|
import torch
|
|
4
5
|
|
|
@@ -32,7 +33,6 @@ def liger_rotary_pos_emb_with_cast(
|
|
|
32
33
|
position_ids: Optional[torch.Tensor] = None,
|
|
33
34
|
unsqueeze_dim: int = 1,
|
|
34
35
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
35
|
-
|
|
36
36
|
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
37
37
|
|
|
38
38
|
q32 = q.to(torch.float32)
|
|
@@ -52,8 +52,6 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
|
52
52
|
position_ids: Optional[torch.Tensor] = None,
|
|
53
53
|
unsqueeze_dim: int = 1,
|
|
54
54
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
55
|
-
|
|
56
|
-
|
|
57
55
|
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
58
56
|
|
|
59
57
|
q32 = q.to(torch.float32).unsqueeze(0)
|
|
@@ -61,7 +59,5 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
|
61
59
|
cos32 = cos.to(torch.float32).unsqueeze(0)
|
|
62
60
|
sin32 = sin.to(torch.float32).unsqueeze(0)
|
|
63
61
|
|
|
64
|
-
q_out, k_out = liger_rotary_pos_emb(
|
|
65
|
-
q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim
|
|
66
|
-
)
|
|
62
|
+
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
|
|
67
63
|
return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)
|
|
@@ -59,12 +59,12 @@ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCc
|
|
|
59
59
|
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
60
60
|
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
61
61
|
liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
|
|
62
|
-
liger_kernel/transformers/monkey_patch.py,sha256=
|
|
62
|
+
liger_kernel/transformers/monkey_patch.py,sha256=O_kl0l56oHinVv-bwl1LU5nKPm6nA0YBjKTYmmwgRbk,124732
|
|
63
63
|
liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
|
|
64
64
|
liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
|
|
65
65
|
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
66
66
|
liger_kernel/transformers/rms_norm.py,sha256=HwddVqrqS58jE-M2_4NkFGARtCDBhGnkKyjBN9b3FYI,3004
|
|
67
|
-
liger_kernel/transformers/rope.py,sha256=
|
|
67
|
+
liger_kernel/transformers/rope.py,sha256=VMlDZI6zss9mLaLcN5XCE_ktmYRwAi_Eh4TIgO6NrIQ,2361
|
|
68
68
|
liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
|
|
69
69
|
liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
|
|
70
70
|
liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
|
|
@@ -98,16 +98,16 @@ liger_kernel/transformers/model/qwen3.py,sha256=Q2aOg5erPrgVgRcqJm8sefLSDtvU1AD5
|
|
|
98
98
|
liger_kernel/transformers/model/qwen3_moe.py,sha256=1CwTMCNFDYsjGoa_aHFBagtC5HuJTV-s0__5UvcjD3A,5686
|
|
99
99
|
liger_kernel/transformers/model/qwen3_next.py,sha256=7To7azriAogxeE7oEvByKztH9154dnDiDVNHHm7PZK4,5632
|
|
100
100
|
liger_kernel/transformers/model/qwen3_vl.py,sha256=YU76HJ0A9kG5CUaZM4i9Bzci4eeXcNl_VSC2tsPWA3k,6301
|
|
101
|
-
liger_kernel/transformers/model/qwen3_vl_moe.py,sha256=
|
|
101
|
+
liger_kernel/transformers/model/qwen3_vl_moe.py,sha256=ykNIvGBtmcTkn236lhmJHzU1IHVR1Kq1YYYlJ5ynhw4,4445
|
|
102
102
|
liger_kernel/transformers/model/smollm3.py,sha256=0KWVkDtXbjsBKhJnaquV6vUUYyLtfmNwYH0sxJt-qTk,7667
|
|
103
103
|
liger_kernel/transformers/model/smolvlm.py,sha256=yFpPKawLVo3zXzLjM7Y_T8FyRrPxVyp-YPFMM8m3k0c,6734
|
|
104
104
|
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
105
105
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
106
106
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
107
107
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
108
|
-
liger_kernel_nightly-0.6.3.
|
|
109
|
-
liger_kernel_nightly-0.6.3.
|
|
110
|
-
liger_kernel_nightly-0.6.3.
|
|
111
|
-
liger_kernel_nightly-0.6.3.
|
|
112
|
-
liger_kernel_nightly-0.6.3.
|
|
113
|
-
liger_kernel_nightly-0.6.3.
|
|
108
|
+
liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
109
|
+
liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/METADATA,sha256=MKC5NuGeIkIrDXRVDM3wv-p0cyVbwya5NujVcmSz-mQ,24777
|
|
110
|
+
liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
111
|
+
liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
112
|
+
liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
113
|
+
liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|