liger-kernel-nightly 0.6.0.dev20250708194445__py3-none-any.whl → 0.6.0.dev20250709042125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -537,7 +537,10 @@ def apply_liger_kernel_to_mllama(
537
537
  if isinstance(model, MllamaForConditionalGeneration):
538
538
  language_model: MllamaForCausalLM = model.language_model
539
539
  vision_model: MllamaVisionModel = model.vision_model
540
- text_model: MllamaTextModel = language_model
540
+ if isinstance(language_model, MllamaForCausalLM):
541
+ text_model: MllamaTextModel = language_model.model
542
+ else:
543
+ text_model = language_model
541
544
  elif isinstance(model, MllamaForCausalLM):
542
545
  text_model = model.model
543
546
  vision_model = None
@@ -1096,7 +1099,9 @@ def apply_liger_kernel_to_paligemma(
1096
1099
  # PaliGemma submodules are ['vision_tower', 'multi_modal_projector', 'language_model']
1097
1100
 
1098
1101
  from transformers.models.gemma.modeling_gemma import GemmaForCausalLM
1102
+ from transformers.models.gemma.modeling_gemma import GemmaModel
1099
1103
  from transformers.models.gemma2.modeling_gemma2 import Gemma2ForCausalLM
1104
+ from transformers.models.gemma2.modeling_gemma2 import Gemma2Model
1100
1105
  from transformers.models.paligemma import modeling_paligemma
1101
1106
  from transformers.models.paligemma.modeling_paligemma import PaliGemmaForConditionalGeneration
1102
1107
  from transformers.models.siglip import modeling_siglip
@@ -1155,7 +1160,7 @@ def apply_liger_kernel_to_paligemma(
1155
1160
 
1156
1161
  language_model = model.language_model
1157
1162
 
1158
- if isinstance(language_model, GemmaForCausalLM):
1163
+ if isinstance(language_model, (GemmaForCausalLM, GemmaModel)):
1159
1164
  apply_liger_kernel_to_gemma(
1160
1165
  rope=rope,
1161
1166
  cross_entropy=False,
@@ -1165,7 +1170,7 @@ def apply_liger_kernel_to_paligemma(
1165
1170
  model=language_model,
1166
1171
  )
1167
1172
 
1168
- elif isinstance(language_model, Gemma2ForCausalLM):
1173
+ elif isinstance(language_model, (Gemma2ForCausalLM, Gemma2Model)):
1169
1174
  apply_liger_kernel_to_gemma2(
1170
1175
  rope=rope,
1171
1176
  cross_entropy=False,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250708194445
3
+ Version: 0.6.0.dev20250709042125
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -33,7 +33,7 @@ License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
- Requires-Dist: transformers>=4.44.2; extra == "dev"
36
+ Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
38
  Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
39
  Requires-Dist: black>=24.4.2; extra == "dev"
@@ -54,7 +54,7 @@ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-
54
54
  liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
55
55
  liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
56
56
  liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
57
- liger_kernel/transformers/monkey_patch.py,sha256=rXmaVry8hdpnH8HunfJhZmrsdlwAxjMP3x10ZYMnTy4,85554
57
+ liger_kernel/transformers/monkey_patch.py,sha256=W7KgJN-rrLZS3pRZ5debO_dSN7zddPegKjqOIP39wR0,85856
58
58
  liger_kernel/transformers/multi_token_attention.py,sha256=l9VDICK0dfmifUDW668hGscP8AHq2rYcM2oGUa3baRQ,1751
59
59
  liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
60
60
  liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
@@ -89,9 +89,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
89
89
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
90
90
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
91
91
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
92
- liger_kernel_nightly-0.6.0.dev20250708194445.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
93
- liger_kernel_nightly-0.6.0.dev20250708194445.dist-info/METADATA,sha256=kV3blHZ_pcaJBcRKdgCg2T6EOvzFL3NwMoDcipaSdc0,24535
94
- liger_kernel_nightly-0.6.0.dev20250708194445.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
95
- liger_kernel_nightly-0.6.0.dev20250708194445.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
96
- liger_kernel_nightly-0.6.0.dev20250708194445.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
97
- liger_kernel_nightly-0.6.0.dev20250708194445.dist-info/RECORD,,
92
+ liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
93
+ liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/METADATA,sha256=wPo1-0a5kmFfk7NvN28x3Sk_NV0g4AEzrOnbB4fJHMk,24535
94
+ liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
95
+ liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
96
+ liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
97
+ liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/RECORD,,