liger-kernel-nightly 0.6.3.dev20251028065948__py3-none-any.whl → 0.6.3.dev20251028143010__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/transformers/monkey_patch.py +52 -20
- {liger_kernel_nightly-0.6.3.dev20251028065948.dist-info → liger_kernel_nightly-0.6.3.dev20251028143010.dist-info}/METADATA +1 -1
- {liger_kernel_nightly-0.6.3.dev20251028065948.dist-info → liger_kernel_nightly-0.6.3.dev20251028143010.dist-info}/RECORD +7 -7
- {liger_kernel_nightly-0.6.3.dev20251028065948.dist-info → liger_kernel_nightly-0.6.3.dev20251028143010.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.3.dev20251028065948.dist-info → liger_kernel_nightly-0.6.3.dev20251028143010.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.3.dev20251028065948.dist-info → liger_kernel_nightly-0.6.3.dev20251028143010.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.3.dev20251028065948.dist-info → liger_kernel_nightly-0.6.3.dev20251028143010.dist-info}/top_level.txt +0 -0
|
@@ -2038,6 +2038,7 @@ def apply_liger_kernel_to_internvl(
|
|
|
2038
2038
|
cross_entropy: bool = False,
|
|
2039
2039
|
fused_linear_cross_entropy: bool = True,
|
|
2040
2040
|
rms_norm: bool = True,
|
|
2041
|
+
layer_norm: bool = True,
|
|
2041
2042
|
model: Optional[PreTrainedModel] = None,
|
|
2042
2043
|
**kwargs,
|
|
2043
2044
|
) -> None:
|
|
@@ -2048,37 +2049,60 @@ def apply_liger_kernel_to_internvl(
|
|
|
2048
2049
|
NOTE: InternVL is not available in transformers<4.52.1
|
|
2049
2050
|
|
|
2050
2051
|
Args:
|
|
2051
|
-
rope (bool): Whether to apply Liger's rotary position embedding. Default is True.
|
|
2052
2052
|
cross_entropy (bool): Whether to apply Liger's cross entropy loss. Default is False.
|
|
2053
2053
|
fused_linear_cross_entropy (bool):
|
|
2054
2054
|
Whether to apply Liger's fused linear cross entropy loss. Default is True.
|
|
2055
2055
|
`cross_entropy` and `fused_linear_cross_entropy` cannot both be True.
|
|
2056
2056
|
If `fused_linear_cross_entropy` is True, the logits will not be materialized but more memory efficient.
|
|
2057
2057
|
rms_norm (bool): Whether to apply Liger's RMSNorm. Default is True.
|
|
2058
|
-
|
|
2058
|
+
layer_norm (bool): Whether to apply Liger's LayerNorm. Default is True.
|
|
2059
2059
|
model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
|
|
2060
2060
|
loaded. Default is None.
|
|
2061
2061
|
"""
|
|
2062
2062
|
assert not (cross_entropy and fused_linear_cross_entropy), (
|
|
2063
2063
|
"cross_entropy and fused_linear_cross_entropy cannot both be True."
|
|
2064
2064
|
)
|
|
2065
|
+
import torch.nn as torch_nn
|
|
2065
2066
|
|
|
2066
2067
|
from transformers.models.internvl import modeling_internvl
|
|
2068
|
+
from transformers.models.internvl.modeling_internvl import InternVLForConditionalGeneration
|
|
2069
|
+
from transformers.models.internvl.modeling_internvl import InternVLModel
|
|
2070
|
+
from transformers.models.internvl.modeling_internvl import InternVLVisionLayer
|
|
2071
|
+
from transformers.models.internvl.modeling_internvl import InternVLVisionModel
|
|
2072
|
+
from transformers.models.internvl.modeling_internvl import InternVLVisionRMSNorm
|
|
2067
2073
|
|
|
2074
|
+
from liger_kernel.transformers.layer_norm import LigerLayerNorm
|
|
2068
2075
|
from liger_kernel.transformers.model.internvl import lce_forward as internvl_lce_forward
|
|
2076
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
|
2077
|
+
|
|
2078
|
+
if layer_norm and model is None:
|
|
2079
|
+
modeling_internvl.nn.LayerNorm = LigerLayerNorm
|
|
2069
2080
|
|
|
2070
2081
|
if cross_entropy:
|
|
2071
|
-
logger.
|
|
2072
|
-
|
|
2082
|
+
logger.info("Apply liger cross entropy")
|
|
2083
|
+
|
|
2084
|
+
from transformers.loss.loss_utils import nn
|
|
2085
|
+
|
|
2086
|
+
nn.functional.cross_entropy = liger_cross_entropy
|
|
2073
2087
|
if fused_linear_cross_entropy:
|
|
2074
2088
|
modeling_internvl.InternVLForConditionalGeneration.forward = internvl_lce_forward
|
|
2075
2089
|
if rms_norm:
|
|
2076
2090
|
modeling_internvl.InternVLVisionRMSNorm = LigerRMSNorm
|
|
2077
2091
|
|
|
2078
2092
|
if model is not None:
|
|
2079
|
-
|
|
2093
|
+
# The model instance already exists, so we need to additionally patch the
|
|
2094
|
+
# instance variables that reference already-instantiated modules
|
|
2095
|
+
if isinstance(model, (InternVLForConditionalGeneration, InternVLModel)):
|
|
2096
|
+
# NOTE: language_model and visual properties can be accessed throught conditional class.
|
|
2097
|
+
text_model = model.language_model
|
|
2098
|
+
vision_model: InternVLVisionModel = model.vision_tower
|
|
2099
|
+
else:
|
|
2100
|
+
raise TypeError(
|
|
2101
|
+
f"Unsupported internvl model type. `model` must be `InternVLForConditionalGeneration`, `InternVLModel`. Got: {type(model)}"
|
|
2102
|
+
)
|
|
2103
|
+
|
|
2104
|
+
text_model_name = model.config.text_config.model_type
|
|
2080
2105
|
text_liger_fn = MODEL_TYPE_TO_APPLY_LIGER_FN.get(text_model_name, None)
|
|
2081
|
-
vision_liger_fn = MODEL_TYPE_TO_APPLY_LIGER_FN.get(vision_model_name, None)
|
|
2082
2106
|
|
|
2083
2107
|
kwargs = {"cross_entropy": False, "fused_linear_cross_entropy": False, **kwargs} | {"rms_norm": rms_norm}
|
|
2084
2108
|
if text_liger_fn:
|
|
@@ -2091,25 +2115,33 @@ def apply_liger_kernel_to_internvl(
|
|
|
2091
2115
|
f"These parameters are not supported by {text_model_name}. Enter the remaining {list(text_kwargs.keys())} except for {list(remain_params)}\n"
|
|
2092
2116
|
f"Parameters accepted by {text_model_name}: {list(accept_params.keys())}"
|
|
2093
2117
|
)
|
|
2094
|
-
text_kwargs["model"] =
|
|
2118
|
+
text_kwargs["model"] = text_model
|
|
2095
2119
|
text_liger_fn(**text_kwargs)
|
|
2096
2120
|
elif text_model_name not in MODEL_TYPE_TO_APPLY_LIGER_FN:
|
|
2097
2121
|
logger.warning(f"{text_model_name} is not supported by Liger kernel.")
|
|
2098
2122
|
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2123
|
+
# Patch vision model RMSNorm layers
|
|
2124
|
+
if rms_norm:
|
|
2125
|
+
for encoder_layer in vision_model.encoder.layer:
|
|
2126
|
+
encoder_layer: InternVLVisionLayer
|
|
2127
|
+
if isinstance(encoder_layer.attention.q_norm, InternVLVisionRMSNorm):
|
|
2128
|
+
_patch_rms_norm_module(encoder_layer.attention.q_norm)
|
|
2129
|
+
if isinstance(encoder_layer.attention.k_norm, InternVLVisionRMSNorm):
|
|
2130
|
+
_patch_rms_norm_module(encoder_layer.attention.k_norm)
|
|
2103
2131
|
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
)
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2132
|
+
# Patch vision model LayerNorm layers
|
|
2133
|
+
if layer_norm:
|
|
2134
|
+
# Patch layernorm
|
|
2135
|
+
if isinstance(vision_model.layernorm, torch_nn.LayerNorm):
|
|
2136
|
+
_patch_layer_norm_module(vision_model.layernorm)
|
|
2137
|
+
|
|
2138
|
+
# Patch encoder layers
|
|
2139
|
+
for encoder_layer in vision_model.encoder.layer:
|
|
2140
|
+
encoder_layer: InternVLVisionLayer
|
|
2141
|
+
if isinstance(encoder_layer.layernorm_before, torch_nn.LayerNorm):
|
|
2142
|
+
_patch_layer_norm_module(encoder_layer.layernorm_before)
|
|
2143
|
+
if isinstance(encoder_layer.layernorm_after, torch_nn.LayerNorm):
|
|
2144
|
+
_patch_layer_norm_module(encoder_layer.layernorm_after)
|
|
2113
2145
|
|
|
2114
2146
|
|
|
2115
2147
|
def apply_liger_kernel_to_smolvlm(
|
|
@@ -59,7 +59,7 @@ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCc
|
|
|
59
59
|
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
60
60
|
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
61
61
|
liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
|
|
62
|
-
liger_kernel/transformers/monkey_patch.py,sha256=
|
|
62
|
+
liger_kernel/transformers/monkey_patch.py,sha256=3DLFMn2VusVcR6C5YElfpHJBRoJxvho0a2JoVdGqxHA,117266
|
|
63
63
|
liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
|
|
64
64
|
liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
|
|
65
65
|
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
@@ -103,9 +103,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
|
103
103
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
104
104
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
105
105
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
106
|
-
liger_kernel_nightly-0.6.3.
|
|
107
|
-
liger_kernel_nightly-0.6.3.
|
|
108
|
-
liger_kernel_nightly-0.6.3.
|
|
109
|
-
liger_kernel_nightly-0.6.3.
|
|
110
|
-
liger_kernel_nightly-0.6.3.
|
|
111
|
-
liger_kernel_nightly-0.6.3.
|
|
106
|
+
liger_kernel_nightly-0.6.3.dev20251028143010.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
107
|
+
liger_kernel_nightly-0.6.3.dev20251028143010.dist-info/METADATA,sha256=ckNo8u8rwQ-UDuznWIg4v4k6i6eePViOYnkx9cshTd8,24777
|
|
108
|
+
liger_kernel_nightly-0.6.3.dev20251028143010.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
109
|
+
liger_kernel_nightly-0.6.3.dev20251028143010.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
110
|
+
liger_kernel_nightly-0.6.3.dev20251028143010.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
111
|
+
liger_kernel_nightly-0.6.3.dev20251028143010.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|