liger-kernel-nightly 0.6.1.dev20250812205818__py3-none-any.whl → 0.6.1.dev20250819172918__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,11 +10,15 @@ from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinea
10
10
  from liger_kernel.transformers.fused_linear_jsd import LigerFusedLinearJSD # noqa: F401
11
11
  from liger_kernel.transformers.geglu import LigerGEGLUMLP # noqa: F401
12
12
  from liger_kernel.transformers.jsd import LigerJSD # noqa: F401
13
+ from liger_kernel.transformers.kl_div import LigerKLDIVLoss # noqa: F401
13
14
  from liger_kernel.transformers.layer_norm import LigerLayerNorm # noqa: F401
14
15
  from liger_kernel.transformers.llama4_rope import liger_llama4_text_rotary_pos_emb # noqa: F401
15
16
  from liger_kernel.transformers.llama4_rope import liger_llama4_vision_rotary_pos_emb # noqa: F401
17
+ from liger_kernel.transformers.multi_token_attention import LigerMultiTokenAttention # noqa: F401
16
18
  from liger_kernel.transformers.rms_norm import LigerRMSNorm # noqa: F401
17
19
  from liger_kernel.transformers.rope import liger_rotary_pos_emb # noqa: F401
20
+ from liger_kernel.transformers.softmax import LigerSoftmax # noqa: F401
21
+ from liger_kernel.transformers.sparsemax import LigerSparsemax # noqa: F401
18
22
  from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP # noqa: F401
19
23
  from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP # noqa: F401
20
24
  from liger_kernel.transformers.swiglu import LigerQwen3MoeSwiGLUMLP # noqa: F401
@@ -134,6 +138,10 @@ __all__ = [
134
138
  "LigerQwen3MoeSwiGLUMLP",
135
139
  "LigerSwiGLUMLP",
136
140
  "LigerTVDLoss",
141
+ "LigerKLDIVLoss",
142
+ "LigerMultiTokenAttention",
143
+ "LigerSoftmax",
144
+ "LigerSparsemax",
137
145
  ]
138
146
 
139
147
  # Add transformer-dependent symbols only if available
@@ -0,0 +1,5 @@
1
+ from liger_kernel.transformers.experimental.embedding import LigerEmbedding # noqa: F401
2
+
3
+ __all__ = [
4
+ "LigerEmbedding",
5
+ ]
@@ -190,7 +190,9 @@ def lce_forward(
190
190
  output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
191
191
  )
192
192
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
193
-
193
+ # Filter out accum_dtype from kwargs for model call as MllamaTextModel doesn't accept it in transformers 4.49.0
194
+ # but preserve it for loss function calls
195
+ model_kwargs = {k: v for k, v in kwargs.items() if k != "accum_dtype"}
194
196
  # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
195
197
  outputs = self.model(
196
198
  input_ids=input_ids,
@@ -206,7 +208,7 @@ def lce_forward(
206
208
  output_hidden_states=output_hidden_states,
207
209
  return_dict=return_dict,
208
210
  cache_position=cache_position,
209
- **kwargs,
211
+ **model_kwargs,
210
212
  )
211
213
 
212
214
  hidden_states = outputs[0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.1.dev20250812205818
3
+ Version: 0.6.1.dev20250819172918
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -41,7 +41,7 @@ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
41
41
  liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
42
42
  liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
43
43
  liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
44
- liger_kernel/transformers/__init__.py,sha256=YQ3ffAaZWLy266snmFFHHfoz4EX1AhcSfojXZhOs6h0,7842
44
+ liger_kernel/transformers/__init__.py,sha256=S4CuPoqoU1GZ97-4JHGSxBInBA4txaYUlpn624cfHRk,8261
45
45
  liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
46
46
  liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
47
47
  liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
@@ -68,6 +68,7 @@ liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsA
68
68
  liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
69
69
  liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
70
70
  liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
71
+ liger_kernel/transformers/experimental/__init__.py,sha256=oQqk-f32JYgWEP9DJCj6ty6bbJSGrdXsFDQFwGeX6vI,127
71
72
  liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
72
73
  liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
74
  liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7mjXipGt-eS0,10007
@@ -80,7 +81,7 @@ liger_kernel/transformers/model/llava.py,sha256=bLCioday_SOm69ogMDBhy_4UsVkH2-BS
80
81
  liger_kernel/transformers/model/loss_utils.py,sha256=YiYsmRHIuoRnFjGpwyIM18DCsrPPmO32YWMWqkEm1UQ,1867
81
82
  liger_kernel/transformers/model/mistral.py,sha256=syYNL8dLThX2-4uC13Lu0krEZ5zw3InviDUR3AJmc-I,5500
82
83
  liger_kernel/transformers/model/mixtral.py,sha256=VY-y73IyjcCyWyI7ahxXLw0fJrhgjYfr1xwRYtsHX0o,11396
83
- liger_kernel/transformers/model/mllama.py,sha256=my29NXk-p6ckQaP8qDIN8e318yI_9mQZHt38MV3SqLY,11280
84
+ liger_kernel/transformers/model/mllama.py,sha256=NhJtlXiuszJHo5YSJOvSGYH47ly7Hse8r-5BKznBg9s,11522
84
85
  liger_kernel/transformers/model/olmo2.py,sha256=6L_bo-ZUgO1lYppdJneOtYxNIylQKS6BiGp13g7Uq9E,5259
85
86
  liger_kernel/transformers/model/paligemma.py,sha256=xuIx3oOwTgftU3jqLfWOxUxgCLBNJh0yNC21an9qDjo,18773
86
87
  liger_kernel/transformers/model/phi3.py,sha256=AwScxUe3LjmHHyQg4gW9bMoUI7uA6fUEMXJ3YhBiHtQ,4046
@@ -94,9 +95,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
94
95
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
95
96
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
96
97
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
97
- liger_kernel_nightly-0.6.1.dev20250812205818.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
98
- liger_kernel_nightly-0.6.1.dev20250812205818.dist-info/METADATA,sha256=1s-igyDBWH7I09Q1f-7-h5BtStzEN30M_ffGQk5ZE4M,24504
99
- liger_kernel_nightly-0.6.1.dev20250812205818.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
100
- liger_kernel_nightly-0.6.1.dev20250812205818.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
101
- liger_kernel_nightly-0.6.1.dev20250812205818.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
102
- liger_kernel_nightly-0.6.1.dev20250812205818.dist-info/RECORD,,
98
+ liger_kernel_nightly-0.6.1.dev20250819172918.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
99
+ liger_kernel_nightly-0.6.1.dev20250819172918.dist-info/METADATA,sha256=TcTw20UYY0RPi33LI2BJMrQM1IflGN-V1qwGI0Lnz5g,24504
100
+ liger_kernel_nightly-0.6.1.dev20250819172918.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
101
+ liger_kernel_nightly-0.6.1.dev20250819172918.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
102
+ liger_kernel_nightly-0.6.1.dev20250819172918.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
103
+ liger_kernel_nightly-0.6.1.dev20250819172918.dist-info/RECORD,,