liger-kernel-nightly 0.6.2.dev20251011154427__py3-none-any.whl → 0.6.2.dev20251013144132__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -469,7 +469,7 @@ def apply_liger_kernel_to_llama4(
469
469
  `cross_entropy` and `fused_linear_cross_entropy` cannot both be True.
470
470
  If `fused_linear_cross_entropy` is True, the logits will not be materialized but more memory efficient.
471
471
  rms_norm (bool): Whether to apply Liger's RMSNorm. Default is True.
472
- swiglu (bool): Whether to apply Liger's SwiGLU MLP. Default is False.
472
+ swiglu (bool): Whether to apply Liger's SwiGLU MLP. Default is True.
473
473
  model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
474
474
  loaded. Default is None.
475
475
  """
@@ -522,7 +522,10 @@ def apply_liger_kernel_to_llama4(
522
522
  _patch_rms_norm_module(text_model.norm)
523
523
  for decoder_layer in text_model.layers:
524
524
  if swiglu:
525
- _patch_swiglu_module(decoder_layer.feed_forward, LigerSwiGLUMLP)
525
+ if decoder_layer.is_moe_layer:
526
+ _patch_swiglu_module(decoder_layer.feed_forward.shared_expert, LigerSwiGLUMLP)
527
+ else:
528
+ _patch_swiglu_module(decoder_layer.feed_forward, LigerSwiGLUMLP)
526
529
  if rms_norm:
527
530
  _patch_rms_norm_module(decoder_layer.input_layernorm)
528
531
  _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20251011154427
3
+ Version: 0.6.2.dev20251013144132
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -58,7 +58,7 @@ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCc
58
58
  liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
59
59
  liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
60
60
  liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
61
- liger_kernel/transformers/monkey_patch.py,sha256=L5mq5mL0GC62bxthN7p4Db5l7NogFE-1JsbZsr4GGik,105877
61
+ liger_kernel/transformers/monkey_patch.py,sha256=TUmx8aY0lonyThcATirRBdSs7uItVvnBggohjBItBuQ,106060
62
62
  liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
63
63
  liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
64
64
  liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
@@ -99,9 +99,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
99
99
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
100
100
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
101
101
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
102
- liger_kernel_nightly-0.6.2.dev20251011154427.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
103
- liger_kernel_nightly-0.6.2.dev20251011154427.dist-info/METADATA,sha256=3CtD4mdR4zhG-Dj4OQESjqTdQrC1_w-gVsOuzIosGW8,24777
104
- liger_kernel_nightly-0.6.2.dev20251011154427.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
105
- liger_kernel_nightly-0.6.2.dev20251011154427.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
106
- liger_kernel_nightly-0.6.2.dev20251011154427.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
107
- liger_kernel_nightly-0.6.2.dev20251011154427.dist-info/RECORD,,
102
+ liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
103
+ liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/METADATA,sha256=3lZjwj_uIcS1aYE--_B3JuOh95x-txytvJPkdZGO_QA,24777
104
+ liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
105
+ liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
106
+ liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
107
+ liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/RECORD,,