liger-kernel 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ from liger_kernel.transformers.model.phi3 import lce_forward as phi3_lce_forward
29
29
  from liger_kernel.transformers.model.phi3 import lce_forward_deprecated as phi3_lce_forward_deprecated
30
30
  from liger_kernel.transformers.model.qwen2 import lce_forward as qwen2_lce_forward
31
31
  from liger_kernel.transformers.model.qwen2 import lce_forward_deprecated as qwen2_lce_forward_deprecated
32
+ from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_forward
32
33
  from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
33
34
  from liger_kernel.transformers.rms_norm import LigerRMSNorm
34
35
  from liger_kernel.transformers.rope import liger_rotary_pos_emb
@@ -77,8 +78,8 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
77
78
  _bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerRMSNorm.extra_repr)
78
79
  _bind_method_to_module(module.original_module, "forward", LigerRMSNorm.forward)
79
80
  _bind_method_to_module(module.original_module, "extra_repr", LigerRMSNorm.extra_repr)
80
- module.modules_to_save.default.__class__.__name__ = LigerRMSNorm.__name__
81
- module.original_module.__class__.__name__ = LigerRMSNorm.__name__
81
+ _bind_method_to_module(module.modules_to_save.default, "_get_name", lambda self: LigerRMSNorm.__name__)
82
+ _bind_method_to_module(module.original_module, "_get_name", lambda self: LigerRMSNorm.__name__)
82
83
  else:
83
84
  module.offset = offset
84
85
  module.casting_mode = casting_mode
@@ -87,7 +88,7 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
87
88
  module.row_mode = row_mode
88
89
  _bind_method_to_module(module, "forward", LigerRMSNorm.forward)
89
90
  _bind_method_to_module(module, "extra_repr", LigerRMSNorm.extra_repr)
90
- module.__class__.__name__ = LigerRMSNorm.__name__
91
+ _bind_method_to_module(module, "_get_name", lambda self: LigerRMSNorm.__name__)
91
92
 
92
93
 
93
94
  def _patch_layer_norm_module(module, eps=1e-6):
@@ -109,28 +110,28 @@ def _patch_layer_norm_module(module, eps=1e-6):
109
110
  module.original_module.hidden_size = getattr(module, "hidden_size", None) or getattr(
110
111
  module, "normalized_shape", None
111
112
  )
112
- _bind_method_to_module(module.modules_to_save.default, "forward", LigerRMSNorm.forward)
113
- _bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerRMSNorm.extra_repr)
114
- _bind_method_to_module(module.original_module, "forward", LigerRMSNorm.forward)
115
- _bind_method_to_module(module.original_module, "extra_repr", LigerRMSNorm.extra_repr)
116
- module.modules_to_save.default.__class__.__name__ = LigerLayerNorm.__name__
117
- module.original_module.__class__.__name__ = LigerLayerNorm.__name__
113
+ _bind_method_to_module(module.modules_to_save.default, "forward", LigerLayerNorm.forward)
114
+ _bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerLayerNorm.extra_repr)
115
+ _bind_method_to_module(module.original_module, "forward", LigerLayerNorm.forward)
116
+ _bind_method_to_module(module.original_module, "extra_repr", LigerLayerNorm.extra_repr)
117
+ _bind_method_to_module(module.modules_to_save.default, "_get_name", lambda self: LigerLayerNorm.__name__)
118
+ _bind_method_to_module(module.original_module, "_get_name", lambda self: LigerLayerNorm.__name__)
118
119
  else:
119
120
  module.variance_epsilon = getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
120
121
  module.hidden_size = getattr(module, "hidden_size", None) or getattr(module, "normalized_shape", None)
121
122
  _bind_method_to_module(module, "forward", LigerLayerNorm.forward)
122
123
  _bind_method_to_module(module, "extra_repr", LigerLayerNorm.extra_repr)
123
- module.__class__.__name__ = LigerLayerNorm.__name__
124
+ _bind_method_to_module(module, "_get_name", lambda self: LigerLayerNorm.__name__)
124
125
 
125
126
 
126
127
  def _patch_swiglu_module(module, liger_module):
127
128
  _bind_method_to_module(module, "forward", liger_module.forward)
128
- module.__class__.__name__ = liger_module.__name__
129
+ _bind_method_to_module(module, "_get_name", lambda self: liger_module.__name__)
129
130
 
130
131
 
131
132
  def _patch_geglu_module(module):
132
133
  _bind_method_to_module(module, "forward", LigerGEGLUMLP.forward)
133
- module.__class__.__name__ = LigerGEGLUMLP.__name__
134
+ _bind_method_to_module(module, "_get_name", lambda self: LigerGEGLUMLP.__name__)
134
135
 
135
136
 
136
137
  def apply_liger_kernel_to_granite(
@@ -290,6 +291,77 @@ def apply_liger_kernel_to_llama(
290
291
  _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
291
292
 
292
293
 
294
+ def apply_liger_kernel_to_smollm3(
295
+ rope: bool = True,
296
+ cross_entropy: bool = False,
297
+ fused_linear_cross_entropy: bool = True,
298
+ rms_norm: bool = True,
299
+ swiglu: bool = True,
300
+ model: PreTrainedModel = None,
301
+ ) -> None:
302
+ """
303
+ Apply Liger kernels to replace original implementation in HuggingFace SmolLM3 model
304
+
305
+ Args:
306
+ rope (bool): Whether to apply Liger's rotary position embedding. Default is True.
307
+ cross_entropy (bool): Whether to apply Liger's cross entropy loss. Default is False.
308
+ fused_linear_cross_entropy (bool):
309
+ Whether to apply Liger's fused linear cross entropy loss. Default is True.
310
+ `cross_entropy` and `fused_linear_cross_entropy` cannot both be True.
311
+ If `fused_linear_cross_entropy` is True, the logits will not be materialized but more memory efficient.
312
+ rms_norm (bool): Whether to apply Liger's RMSNorm. Default is True.
313
+ swiglu (bool): Whether to apply Liger's SwiGLU MLP. Default is True.
314
+ model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
315
+ loaded. Default is None.
316
+ """
317
+
318
+ assert not (cross_entropy and fused_linear_cross_entropy), (
319
+ "cross_entropy and fused_linear_cross_entropy cannot both be True."
320
+ )
321
+
322
+ from transformers.models.smollm3 import modeling_smollm3
323
+ from transformers.models.smollm3.modeling_smollm3 import SmolLM3Model
324
+
325
+ if rope:
326
+ modeling_smollm3.apply_rotary_pos_emb = liger_rotary_pos_emb
327
+ if rms_norm:
328
+ modeling_smollm3.SmolLM3RMSNorm = LigerRMSNorm
329
+ if swiglu:
330
+ modeling_smollm3.SmolLM3MLP = LigerSwiGLUMLP
331
+
332
+ if cross_entropy:
333
+ if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
334
+ from transformers.loss.loss_utils import nn
335
+
336
+ nn.functional.cross_entropy = liger_cross_entropy
337
+ else:
338
+ logger.warning(TRANSFORMER_DEPRECATION_WARNING)
339
+ modeling_smollm3.CrossEntropyLoss = LigerCrossEntropyLoss
340
+
341
+ if fused_linear_cross_entropy:
342
+ if model is not None:
343
+ model.forward = MethodType(smollm3_lce_forward, model)
344
+ else:
345
+ modeling_smollm3.SmolLM3ForCausalLM.forward = smollm3_lce_forward
346
+
347
+ if model is not None:
348
+ # The model instance already exists, so we need to additionally patch the
349
+ # instance variables that reference already-instantiated modules (e.g. SmolLM3RMSNorm or SmolLM3MLP)
350
+
351
+ # get the base model from the model instance
352
+ base_model: SmolLM3Model = getattr(model, model.base_model_prefix, model)
353
+
354
+ if rms_norm:
355
+ _patch_rms_norm_module(base_model.norm)
356
+
357
+ for decoder_layer in base_model.layers:
358
+ if swiglu:
359
+ _patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
360
+ if rms_norm:
361
+ _patch_rms_norm_module(decoder_layer.input_layernorm)
362
+ _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
363
+
364
+
293
365
  def apply_liger_kernel_to_llava(
294
366
  cross_entropy: bool = False,
295
367
  fused_linear_cross_entropy: bool = True,
@@ -1801,6 +1873,7 @@ MODEL_TYPE_TO_APPLY_LIGER_FN = {
1801
1873
  "qwen2_vl_text": apply_liger_kernel_to_qwen2_vl,
1802
1874
  "qwen2_5_vl": apply_liger_kernel_to_qwen2_5_vl,
1803
1875
  "qwen2_5_vl_text": apply_liger_kernel_to_qwen2_5_vl,
1876
+ "smollm3": apply_liger_kernel_to_smollm3,
1804
1877
  "phi3": apply_liger_kernel_to_phi3,
1805
1878
  "paligemma": apply_liger_kernel_to_paligemma,
1806
1879
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: liger_kernel
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -84,7 +84,7 @@ Dynamic: requires-dist
84
84
  </td>
85
85
  <td style="padding: 10px;">
86
86
  <a href="https://discord.gg/gpumode">
87
- <img src="https://dcbadge.vercel.app/api/server/gpumode?style=flat" alt="Join Our Discord">
87
+ <img src="https://dcbadge.limes.pink/api/server/gpumode?style=flat" alt="Join Our Discord">
88
88
  </a>
89
89
  </td>
90
90
  </tr>
@@ -307,7 +307,7 @@ loss.backward()
307
307
  | Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
308
308
  | Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
309
309
  | Qwen3 | `liger_kernel.transformers.apply_liger_kernel_to_qwen3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
310
- | Qwen3 MoE | `liger_kernel_transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
310
+ | Qwen3 MoE | `liger_kernel.transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
311
311
  | Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
312
312
  | Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
313
313
  | OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
@@ -414,21 +414,19 @@ loss.backward()
414
414
 
415
415
  - For issues, create a Github ticket in this repository
416
416
  - For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
417
- - For formal collaboration, send an email to yannchen@linkedin.com and hning@linkedin.com
417
+ - For formal collaboration, send an email to Yanning Chen(yannchen@linkedin.com) and Zhipeng Wang(zhipwang@linkedin.com)
418
418
 
419
419
  ## Cite this work
420
420
 
421
421
  Biblatex entry:
422
422
  ```bib
423
- @article{hsu2024ligerkernelefficienttriton,
424
- title={Liger Kernel: Efficient Triton Kernels for LLM Training},
425
- author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},
426
- year={2024},
427
- eprint={2410.10989},
428
- archivePrefix={arXiv},
429
- primaryClass={cs.LG},
430
- url={https://arxiv.org/abs/2410.10989},
431
- journal={arXiv preprint arXiv:2410.10989},
423
+ @inproceedings{
424
+ hsu2025ligerkernel,
425
+ title={Liger-Kernel: Efficient Triton Kernels for {LLM} Training},
426
+ author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen and Zhipeng Wang},
427
+ booktitle={Championing Open-source DEvelopment in ML Workshop @ ICML25},
428
+ year={2025},
429
+ url={https://openreview.net/forum?id=36SjAIT42G}
432
430
  }
433
431
  ```
434
432
 
@@ -19,6 +19,7 @@ liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZ
19
19
  liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  liger_kernel/ops/cross_entropy.py,sha256=e8THGnhOcy_0SbOLABx67HEM7-B8a8pG7nDKbCRpQKM,19123
21
21
  liger_kernel/ops/dyt.py,sha256=gCLz4S8aul8SY9nvIGaoK67aGb7U9MJRQdo3ONqmQYs,5417
22
+ liger_kernel/ops/fused_add_rms_norm.py,sha256=UBqmlqFCmhSAIpkNKd8rrfXatX7Z4J9bp2dX9A0lrJQ,14017
22
23
  liger_kernel/ops/fused_linear_cross_entropy.py,sha256=5fbGhN85n3zf0uIdJ7PYHWIRzTf0VTFiS0ARtOmqIP0,11020
23
24
  liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
24
25
  liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
@@ -27,11 +28,11 @@ liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2wogg
27
28
  liger_kernel/ops/grpo_loss.py,sha256=anRnv7k1-AV3pCC6_TqP0GMg78YYUfRAJrbpx6PVhl0,9448
28
29
  liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
29
30
  liger_kernel/ops/kl_div.py,sha256=ZjGdDLKWksHT9dZ0xF_TDgAkj5cuMTwwT5tr9E-_24o,8734
30
- liger_kernel/ops/layer_norm.py,sha256=vWCyOm-F2GMAilB-ozJcFeUQQLCJoTE_uiXq-_0uYuI,8356
31
+ liger_kernel/ops/layer_norm.py,sha256=BHPDuaogMTfIJkBJdqLZbOQouNWTf3fJVyOQOD7blCE,9901
31
32
  liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
32
33
  liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
33
- liger_kernel/ops/rms_norm.py,sha256=-rcgHwWCxlA-Syec2XhdW4jfOeCDt2r7qwjslgXFYDU,18865
34
- liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
34
+ liger_kernel/ops/rms_norm.py,sha256=DtvsWN5YktFAoc0JYSAwVeoZfryBFJlX-ipU7ooP01A,18891
35
+ liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
35
36
  liger_kernel/ops/softmax.py,sha256=tgORx6MK1IDDtZKqGarj0IPIVjqAIEUXXYPiinhRdtI,5864
36
37
  liger_kernel/ops/sparsemax.py,sha256=AeWe1xgkHJFEKWTj2vu_0hj7LztGvjqXAps-QTpCY0U,5087
37
38
  liger_kernel/ops/swiglu.py,sha256=D7nd4u_LInwsIRNCDdY77lqnTz8-W5dJrpEAt8zEO_A,3033
@@ -39,12 +40,13 @@ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
39
40
  liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
40
41
  liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
41
42
  liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
42
- liger_kernel/transformers/__init__.py,sha256=mWMEhOabqUkPimMOmkg9DawnO-vL9u_u-N4iIqfNZeg,7259
43
+ liger_kernel/transformers/__init__.py,sha256=VoHQp5emsAJAouql37RuvtGFeZCoMIHgoIxfsyYMTc8,7564
43
44
  liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
44
45
  liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
45
46
  liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
46
47
  liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
47
- liger_kernel/transformers/functional.py,sha256=7Emw7D6VPMg8hfasC33NiolvKmQVF1gV6VayKQCEWJM,7446
48
+ liger_kernel/transformers/functional.py,sha256=PXnACWD7kzgge50RdOUuvtmOTS7DVkkrL7mm0cX5bOc,7734
49
+ liger_kernel/transformers/fused_add_rms_norm.py,sha256=7_Bzg-x6lLe6W1qG2DtjDALhEpNZlC6N5GppEs9cTYY,1199
48
50
  liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=O8Sg5BT81nTaY9fSGoOY9dOD9ekibwwiuXhdUHaxntQ,1742
49
51
  liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
50
52
  liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
@@ -54,7 +56,7 @@ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-
54
56
  liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
55
57
  liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
56
58
  liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
57
- liger_kernel/transformers/monkey_patch.py,sha256=W7KgJN-rrLZS3pRZ5debO_dSN7zddPegKjqOIP39wR0,85856
59
+ liger_kernel/transformers/monkey_patch.py,sha256=tXKo4EKVp3szpdqPh051oLZFrlg_hCbWRv0RpSX_kfY,89238
58
60
  liger_kernel/transformers/multi_token_attention.py,sha256=l9VDICK0dfmifUDW668hGscP8AHq2rYcM2oGUa3baRQ,1751
59
61
  liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
60
62
  liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
@@ -68,7 +70,7 @@ liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1w
68
70
  liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
71
  liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7mjXipGt-eS0,10007
70
72
  liger_kernel/transformers/model/gemma2.py,sha256=R_JFPyWTk7RyA7D05ZiIaNO5pX8gWcvfWf-6rdCRMxs,11296
71
- liger_kernel/transformers/model/gemma3.py,sha256=XbwoqOSPmtS0BPHgT8jZftTzplmiAicgBa6ocNcet8o,12800
73
+ liger_kernel/transformers/model/gemma3.py,sha256=FKO4j3t4W_5uECRA1lhVnXC-It2GhirHm4tpCf9ApAc,12785
72
74
  liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
73
75
  liger_kernel/transformers/model/llama.py,sha256=i8jJgyZsMKWQ-zKloETLugtwFpUOdaWxLDceciFXKd4,12832
74
76
  liger_kernel/transformers/model/llama4.py,sha256=IgbB8sTh3dlETQnaNNy1bZLuXy-Nt7qmeAjF27ydGpg,4210
@@ -85,13 +87,14 @@ liger_kernel/transformers/model/qwen2_5_vl.py,sha256=zEVVwotCXnAm3RRc8-1Nc8uitSW
85
87
  liger_kernel/transformers/model/qwen2_vl.py,sha256=5vK-vtCDpKZ2w33xYp2BS8kQYWUbKMqaiKvQcI27Mss,5884
86
88
  liger_kernel/transformers/model/qwen3.py,sha256=w2jBHuK9kK9EmOr5dnEIXNQXUgUSV_sJUkXSEwxLPHs,4885
87
89
  liger_kernel/transformers/model/qwen3_moe.py,sha256=BkpfFH3fOH0yRfA7LF-AoHTLut2GV0Y4MOlkiIYewfU,5511
90
+ liger_kernel/transformers/model/smollm3.py,sha256=mqayvpwpMbp2yd_Ue7IPzy-dA4KHSDi_ROZW5vHCHfQ,7596
88
91
  liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
89
92
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
90
93
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
91
94
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
92
- liger_kernel-0.6.0.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
93
- liger_kernel-0.6.0.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
94
- liger_kernel-0.6.0.dist-info/METADATA,sha256=YQs0IFuj3o4GPiiDJ6K2s_HqIIWTv8SvQLVU_tPRwGY,24578
95
- liger_kernel-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
96
- liger_kernel-0.6.0.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
97
- liger_kernel-0.6.0.dist-info/RECORD,,
95
+ liger_kernel-0.6.1.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
96
+ liger_kernel-0.6.1.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
97
+ liger_kernel-0.6.1.dist-info/METADATA,sha256=_of0e7dKufrp2upc26bnv4VLBZvAbcdDA8Fssm3mIfk,24545
98
+ liger_kernel-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
+ liger_kernel-0.6.1.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
100
+ liger_kernel-0.6.1.dist-info/RECORD,,