liger-kernel-nightly 0.6.3.dev20251101160510__py3-none-any.whl → 0.6.3.dev20251105012545__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,13 +5,11 @@ from typing import Union
5
5
 
6
6
  import torch
7
7
 
8
+ from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeCausalLMOutputWithPast
9
+ from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import load_balancing_loss_func
8
10
  from transformers.utils import can_return_tuple
9
11
 
10
12
  from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
11
- from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
12
- Qwen3VLMoeCausalLMOutputWithPast,
13
- load_balancing_loss_func,
14
- )
15
13
 
16
14
 
17
15
  @can_return_tuple
@@ -6,7 +6,6 @@ from types import MethodType
6
6
  from typing import Callable
7
7
  from typing import Optional
8
8
 
9
- import torch
10
9
  import transformers
11
10
 
12
11
  from packaging import version
@@ -36,11 +35,9 @@ from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl
36
35
  from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_forward
37
36
  from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
38
37
  from liger_kernel.transformers.rms_norm import LigerRMSNorm
39
- from liger_kernel.transformers.rope import (
40
- liger_rotary_pos_emb,
41
- liger_rotary_pos_emb_with_cast,
42
- liger_rotary_pos_emb_with_cast_and_leading_batch,
43
- )
38
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb
39
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast
40
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
44
41
  from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
45
42
  from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
46
43
  from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
@@ -64,7 +61,6 @@ def _bind_method_to_module(module, method_name: str, new_method: Callable):
64
61
  module.__dict__[method_name] = new_method.__get__(module, module.__class__)
65
62
 
66
63
 
67
-
68
64
  def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True, row_mode=None):
69
65
  # Check if the module is a PEFT ModulesToSaveWrapper
70
66
  # If it is, we need to patch the modules_to_save.default and original_modules
@@ -1651,7 +1647,6 @@ def apply_liger_kernel_to_qwen2_5_vl(
1651
1647
  _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
1652
1648
 
1653
1649
 
1654
-
1655
1650
  def apply_liger_kernel_to_qwen3_vl(
1656
1651
  rope: bool = True,
1657
1652
  cross_entropy: bool = False,
@@ -1688,7 +1683,6 @@ def apply_liger_kernel_to_qwen3_vl(
1688
1683
  modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb_with_cast
1689
1684
  modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_with_cast_and_leading_batch
1690
1685
 
1691
-
1692
1686
  if rms_norm:
1693
1687
  modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
1694
1688
 
@@ -1,4 +1,5 @@
1
- from typing import Optional, Tuple
1
+ from typing import Optional
2
+ from typing import Tuple
2
3
 
3
4
  import torch
4
5
 
@@ -32,7 +33,6 @@ def liger_rotary_pos_emb_with_cast(
32
33
  position_ids: Optional[torch.Tensor] = None,
33
34
  unsqueeze_dim: int = 1,
34
35
  ) -> Tuple[torch.Tensor, torch.Tensor]:
35
-
36
36
  orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
37
37
 
38
38
  q32 = q.to(torch.float32)
@@ -52,8 +52,6 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
52
52
  position_ids: Optional[torch.Tensor] = None,
53
53
  unsqueeze_dim: int = 1,
54
54
  ) -> Tuple[torch.Tensor, torch.Tensor]:
55
-
56
-
57
55
  orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
58
56
 
59
57
  q32 = q.to(torch.float32).unsqueeze(0)
@@ -61,7 +59,5 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
61
59
  cos32 = cos.to(torch.float32).unsqueeze(0)
62
60
  sin32 = sin.to(torch.float32).unsqueeze(0)
63
61
 
64
- q_out, k_out = liger_rotary_pos_emb(
65
- q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim
66
- )
62
+ q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
67
63
  return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.3.dev20251101160510
3
+ Version: 0.6.3.dev20251105012545
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -59,12 +59,12 @@ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCc
59
59
  liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
60
60
  liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
61
61
  liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
62
- liger_kernel/transformers/monkey_patch.py,sha256=Qo5phPCiSF_w29R5AiDO382penkmzuEijv_iNenuuHc,124681
62
+ liger_kernel/transformers/monkey_patch.py,sha256=O_kl0l56oHinVv-bwl1LU5nKPm6nA0YBjKTYmmwgRbk,124732
63
63
  liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
64
64
  liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
65
65
  liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
66
66
  liger_kernel/transformers/rms_norm.py,sha256=HwddVqrqS58jE-M2_4NkFGARtCDBhGnkKyjBN9b3FYI,3004
67
- liger_kernel/transformers/rope.py,sha256=SoOyYArsioIQzp6eZo6vnFumISf06Gl3O8WWkMmr-gQ,2360
67
+ liger_kernel/transformers/rope.py,sha256=VMlDZI6zss9mLaLcN5XCE_ktmYRwAi_Eh4TIgO6NrIQ,2361
68
68
  liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
69
69
  liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
70
70
  liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
@@ -98,16 +98,16 @@ liger_kernel/transformers/model/qwen3.py,sha256=Q2aOg5erPrgVgRcqJm8sefLSDtvU1AD5
98
98
  liger_kernel/transformers/model/qwen3_moe.py,sha256=1CwTMCNFDYsjGoa_aHFBagtC5HuJTV-s0__5UvcjD3A,5686
99
99
  liger_kernel/transformers/model/qwen3_next.py,sha256=7To7azriAogxeE7oEvByKztH9154dnDiDVNHHm7PZK4,5632
100
100
  liger_kernel/transformers/model/qwen3_vl.py,sha256=YU76HJ0A9kG5CUaZM4i9Bzci4eeXcNl_VSC2tsPWA3k,6301
101
- liger_kernel/transformers/model/qwen3_vl_moe.py,sha256=0WuGA-pg5hzKPKc_B3d32qyzXMlkVi3_wlNu9d0KLOg,4392
101
+ liger_kernel/transformers/model/qwen3_vl_moe.py,sha256=ykNIvGBtmcTkn236lhmJHzU1IHVR1Kq1YYYlJ5ynhw4,4445
102
102
  liger_kernel/transformers/model/smollm3.py,sha256=0KWVkDtXbjsBKhJnaquV6vUUYyLtfmNwYH0sxJt-qTk,7667
103
103
  liger_kernel/transformers/model/smolvlm.py,sha256=yFpPKawLVo3zXzLjM7Y_T8FyRrPxVyp-YPFMM8m3k0c,6734
104
104
  liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
105
105
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
106
106
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
107
107
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
108
- liger_kernel_nightly-0.6.3.dev20251101160510.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
109
- liger_kernel_nightly-0.6.3.dev20251101160510.dist-info/METADATA,sha256=rsY01xVUY_8qxjoUXKklmwMso2nGFtFS5caQA2iDGlE,24777
110
- liger_kernel_nightly-0.6.3.dev20251101160510.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
111
- liger_kernel_nightly-0.6.3.dev20251101160510.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
112
- liger_kernel_nightly-0.6.3.dev20251101160510.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
113
- liger_kernel_nightly-0.6.3.dev20251101160510.dist-info/RECORD,,
108
+ liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
109
+ liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/METADATA,sha256=MKC5NuGeIkIrDXRVDM3wv-p0cyVbwya5NujVcmSz-mQ,24777
110
+ liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
111
+ liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
112
+ liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
113
+ liger_kernel_nightly-0.6.3.dev20251105012545.dist-info/RECORD,,