liger-kernel-nightly 0.6.2.dev20251011154427__py3-none-any.whl → 0.6.2.dev20251013144132__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/transformers/monkey_patch.py +5 -2
- {liger_kernel_nightly-0.6.2.dev20251011154427.dist-info → liger_kernel_nightly-0.6.2.dev20251013144132.dist-info}/METADATA +1 -1
- {liger_kernel_nightly-0.6.2.dev20251011154427.dist-info → liger_kernel_nightly-0.6.2.dev20251013144132.dist-info}/RECORD +7 -7
- {liger_kernel_nightly-0.6.2.dev20251011154427.dist-info → liger_kernel_nightly-0.6.2.dev20251013144132.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20251011154427.dist-info → liger_kernel_nightly-0.6.2.dev20251013144132.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20251011154427.dist-info → liger_kernel_nightly-0.6.2.dev20251013144132.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.2.dev20251011154427.dist-info → liger_kernel_nightly-0.6.2.dev20251013144132.dist-info}/top_level.txt +0 -0
@@ -469,7 +469,7 @@ def apply_liger_kernel_to_llama4(
|
|
469
469
|
`cross_entropy` and `fused_linear_cross_entropy` cannot both be True.
|
470
470
|
If `fused_linear_cross_entropy` is True, the logits will not be materialized but more memory efficient.
|
471
471
|
rms_norm (bool): Whether to apply Liger's RMSNorm. Default is True.
|
472
|
-
swiglu (bool): Whether to apply Liger's SwiGLU MLP. Default is
|
472
|
+
swiglu (bool): Whether to apply Liger's SwiGLU MLP. Default is True.
|
473
473
|
model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
|
474
474
|
loaded. Default is None.
|
475
475
|
"""
|
@@ -522,7 +522,10 @@ def apply_liger_kernel_to_llama4(
|
|
522
522
|
_patch_rms_norm_module(text_model.norm)
|
523
523
|
for decoder_layer in text_model.layers:
|
524
524
|
if swiglu:
|
525
|
-
|
525
|
+
if decoder_layer.is_moe_layer:
|
526
|
+
_patch_swiglu_module(decoder_layer.feed_forward.shared_expert, LigerSwiGLUMLP)
|
527
|
+
else:
|
528
|
+
_patch_swiglu_module(decoder_layer.feed_forward, LigerSwiGLUMLP)
|
526
529
|
if rms_norm:
|
527
530
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
528
531
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
@@ -58,7 +58,7 @@ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCc
|
|
58
58
|
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
59
59
|
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
60
60
|
liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
|
61
|
-
liger_kernel/transformers/monkey_patch.py,sha256=
|
61
|
+
liger_kernel/transformers/monkey_patch.py,sha256=TUmx8aY0lonyThcATirRBdSs7uItVvnBggohjBItBuQ,106060
|
62
62
|
liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
|
63
63
|
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
64
64
|
liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
|
@@ -99,9 +99,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
99
99
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
100
100
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
101
101
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
102
|
-
liger_kernel_nightly-0.6.2.
|
103
|
-
liger_kernel_nightly-0.6.2.
|
104
|
-
liger_kernel_nightly-0.6.2.
|
105
|
-
liger_kernel_nightly-0.6.2.
|
106
|
-
liger_kernel_nightly-0.6.2.
|
107
|
-
liger_kernel_nightly-0.6.2.
|
102
|
+
liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
103
|
+
liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/METADATA,sha256=3lZjwj_uIcS1aYE--_B3JuOh95x-txytvJPkdZGO_QA,24777
|
104
|
+
liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
105
|
+
liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
106
|
+
liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
107
|
+
liger_kernel_nightly-0.6.2.dev20251013144132.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|