liger-kernel 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/ops/fused_add_rms_norm.py +412 -0
- liger_kernel/ops/layer_norm.py +126 -89
- liger_kernel/ops/rms_norm.py +2 -2
- liger_kernel/ops/rope.py +1 -1
- liger_kernel/transformers/__init__.py +5 -0
- liger_kernel/transformers/functional.py +5 -0
- liger_kernel/transformers/fused_add_rms_norm.py +39 -0
- liger_kernel/transformers/model/gemma3.py +1 -1
- liger_kernel/transformers/model/smollm3.py +189 -0
- liger_kernel/transformers/monkey_patch.py +85 -12
- {liger_kernel-0.6.0.dist-info → liger_kernel-0.6.1.dist-info}/METADATA +11 -13
- {liger_kernel-0.6.0.dist-info → liger_kernel-0.6.1.dist-info}/RECORD +16 -13
- {liger_kernel-0.6.0.dist-info → liger_kernel-0.6.1.dist-info}/WHEEL +0 -0
- {liger_kernel-0.6.0.dist-info → liger_kernel-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {liger_kernel-0.6.0.dist-info → liger_kernel-0.6.1.dist-info}/licenses/NOTICE +0 -0
- {liger_kernel-0.6.0.dist-info → liger_kernel-0.6.1.dist-info}/top_level.txt +0 -0
|
@@ -29,6 +29,7 @@ from liger_kernel.transformers.model.phi3 import lce_forward as phi3_lce_forward
|
|
|
29
29
|
from liger_kernel.transformers.model.phi3 import lce_forward_deprecated as phi3_lce_forward_deprecated
|
|
30
30
|
from liger_kernel.transformers.model.qwen2 import lce_forward as qwen2_lce_forward
|
|
31
31
|
from liger_kernel.transformers.model.qwen2 import lce_forward_deprecated as qwen2_lce_forward_deprecated
|
|
32
|
+
from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_forward
|
|
32
33
|
from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
|
|
33
34
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
|
34
35
|
from liger_kernel.transformers.rope import liger_rotary_pos_emb
|
|
@@ -77,8 +78,8 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
|
|
|
77
78
|
_bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerRMSNorm.extra_repr)
|
|
78
79
|
_bind_method_to_module(module.original_module, "forward", LigerRMSNorm.forward)
|
|
79
80
|
_bind_method_to_module(module.original_module, "extra_repr", LigerRMSNorm.extra_repr)
|
|
80
|
-
module.modules_to_save.default
|
|
81
|
-
module.original_module
|
|
81
|
+
_bind_method_to_module(module.modules_to_save.default, "_get_name", lambda self: LigerRMSNorm.__name__)
|
|
82
|
+
_bind_method_to_module(module.original_module, "_get_name", lambda self: LigerRMSNorm.__name__)
|
|
82
83
|
else:
|
|
83
84
|
module.offset = offset
|
|
84
85
|
module.casting_mode = casting_mode
|
|
@@ -87,7 +88,7 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
|
|
|
87
88
|
module.row_mode = row_mode
|
|
88
89
|
_bind_method_to_module(module, "forward", LigerRMSNorm.forward)
|
|
89
90
|
_bind_method_to_module(module, "extra_repr", LigerRMSNorm.extra_repr)
|
|
90
|
-
module
|
|
91
|
+
_bind_method_to_module(module, "_get_name", lambda self: LigerRMSNorm.__name__)
|
|
91
92
|
|
|
92
93
|
|
|
93
94
|
def _patch_layer_norm_module(module, eps=1e-6):
|
|
@@ -109,28 +110,28 @@ def _patch_layer_norm_module(module, eps=1e-6):
|
|
|
109
110
|
module.original_module.hidden_size = getattr(module, "hidden_size", None) or getattr(
|
|
110
111
|
module, "normalized_shape", None
|
|
111
112
|
)
|
|
112
|
-
_bind_method_to_module(module.modules_to_save.default, "forward",
|
|
113
|
-
_bind_method_to_module(module.modules_to_save.default, "extra_repr",
|
|
114
|
-
_bind_method_to_module(module.original_module, "forward",
|
|
115
|
-
_bind_method_to_module(module.original_module, "extra_repr",
|
|
116
|
-
module.modules_to_save.default
|
|
117
|
-
module.original_module
|
|
113
|
+
_bind_method_to_module(module.modules_to_save.default, "forward", LigerLayerNorm.forward)
|
|
114
|
+
_bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerLayerNorm.extra_repr)
|
|
115
|
+
_bind_method_to_module(module.original_module, "forward", LigerLayerNorm.forward)
|
|
116
|
+
_bind_method_to_module(module.original_module, "extra_repr", LigerLayerNorm.extra_repr)
|
|
117
|
+
_bind_method_to_module(module.modules_to_save.default, "_get_name", lambda self: LigerLayerNorm.__name__)
|
|
118
|
+
_bind_method_to_module(module.original_module, "_get_name", lambda self: LigerLayerNorm.__name__)
|
|
118
119
|
else:
|
|
119
120
|
module.variance_epsilon = getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
|
120
121
|
module.hidden_size = getattr(module, "hidden_size", None) or getattr(module, "normalized_shape", None)
|
|
121
122
|
_bind_method_to_module(module, "forward", LigerLayerNorm.forward)
|
|
122
123
|
_bind_method_to_module(module, "extra_repr", LigerLayerNorm.extra_repr)
|
|
123
|
-
module
|
|
124
|
+
_bind_method_to_module(module, "_get_name", lambda self: LigerLayerNorm.__name__)
|
|
124
125
|
|
|
125
126
|
|
|
126
127
|
def _patch_swiglu_module(module, liger_module):
|
|
127
128
|
_bind_method_to_module(module, "forward", liger_module.forward)
|
|
128
|
-
module
|
|
129
|
+
_bind_method_to_module(module, "_get_name", lambda self: liger_module.__name__)
|
|
129
130
|
|
|
130
131
|
|
|
131
132
|
def _patch_geglu_module(module):
|
|
132
133
|
_bind_method_to_module(module, "forward", LigerGEGLUMLP.forward)
|
|
133
|
-
module
|
|
134
|
+
_bind_method_to_module(module, "_get_name", lambda self: LigerGEGLUMLP.__name__)
|
|
134
135
|
|
|
135
136
|
|
|
136
137
|
def apply_liger_kernel_to_granite(
|
|
@@ -290,6 +291,77 @@ def apply_liger_kernel_to_llama(
|
|
|
290
291
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
291
292
|
|
|
292
293
|
|
|
294
|
+
def apply_liger_kernel_to_smollm3(
|
|
295
|
+
rope: bool = True,
|
|
296
|
+
cross_entropy: bool = False,
|
|
297
|
+
fused_linear_cross_entropy: bool = True,
|
|
298
|
+
rms_norm: bool = True,
|
|
299
|
+
swiglu: bool = True,
|
|
300
|
+
model: PreTrainedModel = None,
|
|
301
|
+
) -> None:
|
|
302
|
+
"""
|
|
303
|
+
Apply Liger kernels to replace original implementation in HuggingFace SmolLM3 model
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
rope (bool): Whether to apply Liger's rotary position embedding. Default is True.
|
|
307
|
+
cross_entropy (bool): Whether to apply Liger's cross entropy loss. Default is False.
|
|
308
|
+
fused_linear_cross_entropy (bool):
|
|
309
|
+
Whether to apply Liger's fused linear cross entropy loss. Default is True.
|
|
310
|
+
`cross_entropy` and `fused_linear_cross_entropy` cannot both be True.
|
|
311
|
+
If `fused_linear_cross_entropy` is True, the logits will not be materialized but more memory efficient.
|
|
312
|
+
rms_norm (bool): Whether to apply Liger's RMSNorm. Default is True.
|
|
313
|
+
swiglu (bool): Whether to apply Liger's SwiGLU MLP. Default is True.
|
|
314
|
+
model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
|
|
315
|
+
loaded. Default is None.
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
assert not (cross_entropy and fused_linear_cross_entropy), (
|
|
319
|
+
"cross_entropy and fused_linear_cross_entropy cannot both be True."
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
from transformers.models.smollm3 import modeling_smollm3
|
|
323
|
+
from transformers.models.smollm3.modeling_smollm3 import SmolLM3Model
|
|
324
|
+
|
|
325
|
+
if rope:
|
|
326
|
+
modeling_smollm3.apply_rotary_pos_emb = liger_rotary_pos_emb
|
|
327
|
+
if rms_norm:
|
|
328
|
+
modeling_smollm3.SmolLM3RMSNorm = LigerRMSNorm
|
|
329
|
+
if swiglu:
|
|
330
|
+
modeling_smollm3.SmolLM3MLP = LigerSwiGLUMLP
|
|
331
|
+
|
|
332
|
+
if cross_entropy:
|
|
333
|
+
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
|
334
|
+
from transformers.loss.loss_utils import nn
|
|
335
|
+
|
|
336
|
+
nn.functional.cross_entropy = liger_cross_entropy
|
|
337
|
+
else:
|
|
338
|
+
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
|
339
|
+
modeling_smollm3.CrossEntropyLoss = LigerCrossEntropyLoss
|
|
340
|
+
|
|
341
|
+
if fused_linear_cross_entropy:
|
|
342
|
+
if model is not None:
|
|
343
|
+
model.forward = MethodType(smollm3_lce_forward, model)
|
|
344
|
+
else:
|
|
345
|
+
modeling_smollm3.SmolLM3ForCausalLM.forward = smollm3_lce_forward
|
|
346
|
+
|
|
347
|
+
if model is not None:
|
|
348
|
+
# The model instance already exists, so we need to additionally patch the
|
|
349
|
+
# instance variables that reference already-instantiated modules (e.g. SmolLM3RMSNorm or SmolLM3MLP)
|
|
350
|
+
|
|
351
|
+
# get the base model from the model instance
|
|
352
|
+
base_model: SmolLM3Model = getattr(model, model.base_model_prefix, model)
|
|
353
|
+
|
|
354
|
+
if rms_norm:
|
|
355
|
+
_patch_rms_norm_module(base_model.norm)
|
|
356
|
+
|
|
357
|
+
for decoder_layer in base_model.layers:
|
|
358
|
+
if swiglu:
|
|
359
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
360
|
+
if rms_norm:
|
|
361
|
+
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
362
|
+
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
363
|
+
|
|
364
|
+
|
|
293
365
|
def apply_liger_kernel_to_llava(
|
|
294
366
|
cross_entropy: bool = False,
|
|
295
367
|
fused_linear_cross_entropy: bool = True,
|
|
@@ -1801,6 +1873,7 @@ MODEL_TYPE_TO_APPLY_LIGER_FN = {
|
|
|
1801
1873
|
"qwen2_vl_text": apply_liger_kernel_to_qwen2_vl,
|
|
1802
1874
|
"qwen2_5_vl": apply_liger_kernel_to_qwen2_5_vl,
|
|
1803
1875
|
"qwen2_5_vl_text": apply_liger_kernel_to_qwen2_5_vl,
|
|
1876
|
+
"smollm3": apply_liger_kernel_to_smollm3,
|
|
1804
1877
|
"phi3": apply_liger_kernel_to_phi3,
|
|
1805
1878
|
"paligemma": apply_liger_kernel_to_paligemma,
|
|
1806
1879
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: liger_kernel
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -84,7 +84,7 @@ Dynamic: requires-dist
|
|
|
84
84
|
</td>
|
|
85
85
|
<td style="padding: 10px;">
|
|
86
86
|
<a href="https://discord.gg/gpumode">
|
|
87
|
-
<img src="https://dcbadge.
|
|
87
|
+
<img src="https://dcbadge.limes.pink/api/server/gpumode?style=flat" alt="Join Our Discord">
|
|
88
88
|
</a>
|
|
89
89
|
</td>
|
|
90
90
|
</tr>
|
|
@@ -307,7 +307,7 @@ loss.backward()
|
|
|
307
307
|
| Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
308
308
|
| Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
309
309
|
| Qwen3 | `liger_kernel.transformers.apply_liger_kernel_to_qwen3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
310
|
-
| Qwen3 MoE | `
|
|
310
|
+
| Qwen3 MoE | `liger_kernel.transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
311
311
|
| Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
312
312
|
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
313
313
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -414,21 +414,19 @@ loss.backward()
|
|
|
414
414
|
|
|
415
415
|
- For issues, create a Github ticket in this repository
|
|
416
416
|
- For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
|
|
417
|
-
- For formal collaboration, send an email to yannchen@linkedin.com and
|
|
417
|
+
- For formal collaboration, send an email to Yanning Chen(yannchen@linkedin.com) and Zhipeng Wang(zhipwang@linkedin.com)
|
|
418
418
|
|
|
419
419
|
## Cite this work
|
|
420
420
|
|
|
421
421
|
Biblatex entry:
|
|
422
422
|
```bib
|
|
423
|
-
@
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
url={https://arxiv.org/abs/2410.10989},
|
|
431
|
-
journal={arXiv preprint arXiv:2410.10989},
|
|
423
|
+
@inproceedings{
|
|
424
|
+
hsu2025ligerkernel,
|
|
425
|
+
title={Liger-Kernel: Efficient Triton Kernels for {LLM} Training},
|
|
426
|
+
author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen and Zhipeng Wang},
|
|
427
|
+
booktitle={Championing Open-source DEvelopment in ML Workshop @ ICML25},
|
|
428
|
+
year={2025},
|
|
429
|
+
url={https://openreview.net/forum?id=36SjAIT42G}
|
|
432
430
|
}
|
|
433
431
|
```
|
|
434
432
|
|
|
@@ -19,6 +19,7 @@ liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZ
|
|
|
19
19
|
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
20
|
liger_kernel/ops/cross_entropy.py,sha256=e8THGnhOcy_0SbOLABx67HEM7-B8a8pG7nDKbCRpQKM,19123
|
|
21
21
|
liger_kernel/ops/dyt.py,sha256=gCLz4S8aul8SY9nvIGaoK67aGb7U9MJRQdo3ONqmQYs,5417
|
|
22
|
+
liger_kernel/ops/fused_add_rms_norm.py,sha256=UBqmlqFCmhSAIpkNKd8rrfXatX7Z4J9bp2dX9A0lrJQ,14017
|
|
22
23
|
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=5fbGhN85n3zf0uIdJ7PYHWIRzTf0VTFiS0ARtOmqIP0,11020
|
|
23
24
|
liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
|
|
24
25
|
liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
|
|
@@ -27,11 +28,11 @@ liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2wogg
|
|
|
27
28
|
liger_kernel/ops/grpo_loss.py,sha256=anRnv7k1-AV3pCC6_TqP0GMg78YYUfRAJrbpx6PVhl0,9448
|
|
28
29
|
liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
|
|
29
30
|
liger_kernel/ops/kl_div.py,sha256=ZjGdDLKWksHT9dZ0xF_TDgAkj5cuMTwwT5tr9E-_24o,8734
|
|
30
|
-
liger_kernel/ops/layer_norm.py,sha256=
|
|
31
|
+
liger_kernel/ops/layer_norm.py,sha256=BHPDuaogMTfIJkBJdqLZbOQouNWTf3fJVyOQOD7blCE,9901
|
|
31
32
|
liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
|
|
32
33
|
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
33
|
-
liger_kernel/ops/rms_norm.py,sha256
|
|
34
|
-
liger_kernel/ops/rope.py,sha256=
|
|
34
|
+
liger_kernel/ops/rms_norm.py,sha256=DtvsWN5YktFAoc0JYSAwVeoZfryBFJlX-ipU7ooP01A,18891
|
|
35
|
+
liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
|
|
35
36
|
liger_kernel/ops/softmax.py,sha256=tgORx6MK1IDDtZKqGarj0IPIVjqAIEUXXYPiinhRdtI,5864
|
|
36
37
|
liger_kernel/ops/sparsemax.py,sha256=AeWe1xgkHJFEKWTj2vu_0hj7LztGvjqXAps-QTpCY0U,5087
|
|
37
38
|
liger_kernel/ops/swiglu.py,sha256=D7nd4u_LInwsIRNCDdY77lqnTz8-W5dJrpEAt8zEO_A,3033
|
|
@@ -39,12 +40,13 @@ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
|
39
40
|
liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
|
|
40
41
|
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
41
42
|
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
42
|
-
liger_kernel/transformers/__init__.py,sha256=
|
|
43
|
+
liger_kernel/transformers/__init__.py,sha256=VoHQp5emsAJAouql37RuvtGFeZCoMIHgoIxfsyYMTc8,7564
|
|
43
44
|
liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
|
|
44
45
|
liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
|
|
45
46
|
liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
|
|
46
47
|
liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
|
|
47
|
-
liger_kernel/transformers/functional.py,sha256=
|
|
48
|
+
liger_kernel/transformers/functional.py,sha256=PXnACWD7kzgge50RdOUuvtmOTS7DVkkrL7mm0cX5bOc,7734
|
|
49
|
+
liger_kernel/transformers/fused_add_rms_norm.py,sha256=7_Bzg-x6lLe6W1qG2DtjDALhEpNZlC6N5GppEs9cTYY,1199
|
|
48
50
|
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=O8Sg5BT81nTaY9fSGoOY9dOD9ekibwwiuXhdUHaxntQ,1742
|
|
49
51
|
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
50
52
|
liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
|
|
@@ -54,7 +56,7 @@ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-
|
|
|
54
56
|
liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
|
|
55
57
|
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
56
58
|
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
57
|
-
liger_kernel/transformers/monkey_patch.py,sha256=
|
|
59
|
+
liger_kernel/transformers/monkey_patch.py,sha256=tXKo4EKVp3szpdqPh051oLZFrlg_hCbWRv0RpSX_kfY,89238
|
|
58
60
|
liger_kernel/transformers/multi_token_attention.py,sha256=l9VDICK0dfmifUDW668hGscP8AHq2rYcM2oGUa3baRQ,1751
|
|
59
61
|
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
60
62
|
liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
|
|
@@ -68,7 +70,7 @@ liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1w
|
|
|
68
70
|
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
71
|
liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7mjXipGt-eS0,10007
|
|
70
72
|
liger_kernel/transformers/model/gemma2.py,sha256=R_JFPyWTk7RyA7D05ZiIaNO5pX8gWcvfWf-6rdCRMxs,11296
|
|
71
|
-
liger_kernel/transformers/model/gemma3.py,sha256=
|
|
73
|
+
liger_kernel/transformers/model/gemma3.py,sha256=FKO4j3t4W_5uECRA1lhVnXC-It2GhirHm4tpCf9ApAc,12785
|
|
72
74
|
liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
|
|
73
75
|
liger_kernel/transformers/model/llama.py,sha256=i8jJgyZsMKWQ-zKloETLugtwFpUOdaWxLDceciFXKd4,12832
|
|
74
76
|
liger_kernel/transformers/model/llama4.py,sha256=IgbB8sTh3dlETQnaNNy1bZLuXy-Nt7qmeAjF27ydGpg,4210
|
|
@@ -85,13 +87,14 @@ liger_kernel/transformers/model/qwen2_5_vl.py,sha256=zEVVwotCXnAm3RRc8-1Nc8uitSW
|
|
|
85
87
|
liger_kernel/transformers/model/qwen2_vl.py,sha256=5vK-vtCDpKZ2w33xYp2BS8kQYWUbKMqaiKvQcI27Mss,5884
|
|
86
88
|
liger_kernel/transformers/model/qwen3.py,sha256=w2jBHuK9kK9EmOr5dnEIXNQXUgUSV_sJUkXSEwxLPHs,4885
|
|
87
89
|
liger_kernel/transformers/model/qwen3_moe.py,sha256=BkpfFH3fOH0yRfA7LF-AoHTLut2GV0Y4MOlkiIYewfU,5511
|
|
90
|
+
liger_kernel/transformers/model/smollm3.py,sha256=mqayvpwpMbp2yd_Ue7IPzy-dA4KHSDi_ROZW5vHCHfQ,7596
|
|
88
91
|
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
89
92
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
90
93
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
91
94
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
92
|
-
liger_kernel-0.6.
|
|
93
|
-
liger_kernel-0.6.
|
|
94
|
-
liger_kernel-0.6.
|
|
95
|
-
liger_kernel-0.6.
|
|
96
|
-
liger_kernel-0.6.
|
|
97
|
-
liger_kernel-0.6.
|
|
95
|
+
liger_kernel-0.6.1.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
96
|
+
liger_kernel-0.6.1.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
97
|
+
liger_kernel-0.6.1.dist-info/METADATA,sha256=_of0e7dKufrp2upc26bnv4VLBZvAbcdDA8Fssm3mIfk,24545
|
|
98
|
+
liger_kernel-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
99
|
+
liger_kernel-0.6.1.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
100
|
+
liger_kernel-0.6.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|