liger-kernel-nightly 0.5.10.dev20250618073949__py3-none-any.whl → 0.5.10.dev20250618193218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,7 +54,7 @@ def _bind_method_to_module(module, method_name: str, new_method: Callable):
54
54
  module.__dict__[method_name] = new_method.__get__(module, module.__class__)
55
55
 
56
56
 
57
- def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True):
57
+ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True, row_mode=None):
58
58
  # Check if the module is a PEFT ModulesToSaveWrapper
59
59
  # If it is, we need to patch the modules_to_save.default and original_modules
60
60
  if PEFT_AVAILABLE and isinstance(module, peft.utils.other.ModulesToSaveWrapper):
@@ -64,12 +64,14 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
64
64
  getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
65
65
  )
66
66
  module.modules_to_save.default.in_place = in_place
67
+ module.modules_to_save.default.row_mode = row_mode
67
68
  module.original_module.offset = offset
68
69
  module.original_module.casting_mode = casting_mode
69
70
  module.original_module.variance_epsilon = (
70
71
  getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
71
72
  )
72
73
  module.original_module.in_place = in_place
74
+ module.original_module.row_mode = row_mode
73
75
  _bind_method_to_module(module.modules_to_save.default, "forward", LigerRMSNorm.forward)
74
76
  _bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerRMSNorm.extra_repr)
75
77
  _bind_method_to_module(module.original_module, "forward", LigerRMSNorm.forward)
@@ -81,6 +83,7 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
81
83
  module.casting_mode = casting_mode
82
84
  module.variance_epsilon = getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
83
85
  module.in_place = in_place
86
+ module.row_mode = row_mode
84
87
  _bind_method_to_module(module, "forward", LigerRMSNorm.forward)
85
88
  _bind_method_to_module(module, "extra_repr", LigerRMSNorm.extra_repr)
86
89
  module.__class__.__name__ = LigerRMSNorm.__name__
@@ -1208,7 +1211,8 @@ def apply_liger_kernel_to_qwen3_moe(
1208
1211
  _patch_rms_norm_module(base_model.norm)
1209
1212
  for decoder_layer in base_model.layers:
1210
1213
  if swiglu:
1211
- _patch_swiglu_module(decoder_layer.mlp, LigerQwen3MoeSwiGLUMLP)
1214
+ for mlp_expert in decoder_layer.mlp.experts:
1215
+ _patch_swiglu_module(mlp_expert, LigerQwen3MoeSwiGLUMLP)
1212
1216
  if rms_norm:
1213
1217
  _patch_rms_norm_module(decoder_layer.input_layernorm)
1214
1218
  _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
@@ -41,9 +41,7 @@ class LigerRMSNorm(nn.Module):
41
41
  )
42
42
 
43
43
  def extra_repr(self):
44
- return (
45
- f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}, offset={self.offset}, in_place={self.in_place}"
46
- )
44
+ return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}, offset={self.offset}, in_place={self.in_place}, row_mode={self.row_mode}"
47
45
 
48
46
 
49
47
  class LigerRMSNormForGemma(LigerRMSNorm):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250618073949
3
+ Version: 0.5.10.dev20250618193218
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -53,10 +53,10 @@ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-
53
53
  liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
54
54
  liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
55
55
  liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
56
- liger_kernel/transformers/monkey_patch.py,sha256=IWqNiimHL0895yo0TjQ3lN_Y8fKGesxC-bF5He6zB2g,77536
56
+ liger_kernel/transformers/monkey_patch.py,sha256=FacsJGO8MUbn1I7HttRAlDGhLtD8jjRTXSAwvI0iPrg,77754
57
57
  liger_kernel/transformers/multi_token_attention.py,sha256=l9VDICK0dfmifUDW668hGscP8AHq2rYcM2oGUa3baRQ,1751
58
58
  liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
59
- liger_kernel/transformers/rms_norm.py,sha256=eErIr1n-13oVrc1VJY07lqazYelw_vlu9Az__RmXPSE,2717
59
+ liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
60
60
  liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
61
61
  liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
62
62
  liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
@@ -87,9 +87,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
87
87
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
88
88
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
89
89
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
90
- liger_kernel_nightly-0.5.10.dev20250618073949.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
91
- liger_kernel_nightly-0.5.10.dev20250618073949.dist-info/METADATA,sha256=cjOju93tisY2Oux3OO1HE8UM-PWHlP92vkWajAgQCwU,24358
92
- liger_kernel_nightly-0.5.10.dev20250618073949.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
93
- liger_kernel_nightly-0.5.10.dev20250618073949.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
94
- liger_kernel_nightly-0.5.10.dev20250618073949.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
95
- liger_kernel_nightly-0.5.10.dev20250618073949.dist-info/RECORD,,
90
+ liger_kernel_nightly-0.5.10.dev20250618193218.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
91
+ liger_kernel_nightly-0.5.10.dev20250618193218.dist-info/METADATA,sha256=btKwX4xwU1mZofQJGTzF3RQw4MjRayIRoCXbuag9aUM,24358
92
+ liger_kernel_nightly-0.5.10.dev20250618193218.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
93
+ liger_kernel_nightly-0.5.10.dev20250618193218.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
94
+ liger_kernel_nightly-0.5.10.dev20250618193218.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
95
+ liger_kernel_nightly-0.5.10.dev20250618193218.dist-info/RECORD,,