liger-kernel 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/chunked_loss/dpo_loss.py +54 -3
- liger_kernel/chunked_loss/fused_linear_ppo.py +4 -0
- liger_kernel/chunked_loss/grpo_loss.py +38 -4
- liger_kernel/chunked_loss/jsd_loss.py +5 -2
- liger_kernel/ops/cross_entropy.py +59 -53
- liger_kernel/ops/fused_linear_cross_entropy.py +83 -17
- liger_kernel/ops/layer_norm.py +4 -6
- liger_kernel/ops/llama4_rope.py +225 -0
- liger_kernel/ops/poly_norm.py +386 -0
- liger_kernel/transformers/__init__.py +32 -0
- liger_kernel/transformers/experimental/__init__.py +5 -0
- liger_kernel/transformers/functional.py +9 -0
- liger_kernel/transformers/fused_linear_cross_entropy.py +8 -1
- liger_kernel/transformers/llama4_rope.py +93 -0
- liger_kernel/transformers/model/falcon_h1.py +108 -0
- liger_kernel/transformers/model/gemma.py +2 -1
- liger_kernel/transformers/model/gemma2.py +8 -2
- liger_kernel/transformers/model/gemma3.py +27 -2
- liger_kernel/transformers/model/glm4.py +2 -1
- liger_kernel/transformers/model/glm4v.py +151 -0
- liger_kernel/transformers/model/glm4v_moe.py +153 -0
- liger_kernel/transformers/model/internvl.py +150 -0
- liger_kernel/transformers/model/llama.py +2 -1
- liger_kernel/transformers/model/llama4.py +2 -1
- liger_kernel/transformers/model/llava.py +6 -2
- liger_kernel/transformers/model/loss_utils.py +3 -0
- liger_kernel/transformers/model/mistral.py +2 -1
- liger_kernel/transformers/model/mixtral.py +8 -2
- liger_kernel/transformers/model/mllama.py +6 -3
- liger_kernel/transformers/model/olmo2.py +2 -1
- liger_kernel/transformers/model/paligemma.py +19 -0
- liger_kernel/transformers/model/phi3.py +10 -160
- liger_kernel/transformers/model/qwen2.py +2 -1
- liger_kernel/transformers/model/qwen2_5_vl.py +7 -2
- liger_kernel/transformers/model/qwen2_vl.py +7 -2
- liger_kernel/transformers/model/qwen3.py +2 -1
- liger_kernel/transformers/model/qwen3_moe.py +8 -2
- liger_kernel/transformers/model/qwen3_next.py +134 -0
- liger_kernel/transformers/model/smollm3.py +2 -1
- liger_kernel/transformers/model/smolvlm.py +158 -0
- liger_kernel/transformers/monkey_patch.py +552 -23
- liger_kernel/transformers/multi_token_attention.py +1 -1
- liger_kernel/transformers/poly_norm.py +42 -0
- liger_kernel/transformers/rms_norm.py +7 -0
- {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/METADATA +14 -11
- {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/RECORD +50 -39
- {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/WHEEL +0 -0
- {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/licenses/LICENSE +0 -0
- {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/licenses/NOTICE +0 -0
- {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
|
|
4
|
+
from liger_kernel.ops.poly_norm import LigerPolyNormFunction
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LigerPolyNorm(nn.Module):
|
|
8
|
+
"""
|
|
9
|
+
PolyNorm layer wrapper for Liger kernel.
|
|
10
|
+
|
|
11
|
+
PolyNorm formula:
|
|
12
|
+
y = w₀·norm(x³) + w₁·norm(x²) + w₂·norm(x) + b
|
|
13
|
+
where norm(u) = u / sqrt(mean(u²) + ε)
|
|
14
|
+
|
|
15
|
+
Reference:
|
|
16
|
+
https://github.com/BryceZhuo/PolyCom/
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
eps: epsilon for numerical stability (default: 1e-6)
|
|
20
|
+
in_place: whether to in-place modify grad_output in backward to save memory (default: False).
|
|
21
|
+
Set to True to save memory if grad_output is not needed elsewhere.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, eps=1e-6, in_place=True):
|
|
25
|
+
super().__init__()
|
|
26
|
+
# Align with PolyCom reference: initialize weights to (1/3, 1/3, 1/3) and bias to 1.0
|
|
27
|
+
self.weight = nn.Parameter(torch.full((3,), 1.0 / 3.0))
|
|
28
|
+
self.bias = nn.Parameter(torch.tensor(1.0))
|
|
29
|
+
self.variance_epsilon = eps
|
|
30
|
+
self.in_place = in_place
|
|
31
|
+
|
|
32
|
+
def forward(self, hidden_states):
|
|
33
|
+
return LigerPolyNormFunction.apply(
|
|
34
|
+
hidden_states,
|
|
35
|
+
self.weight,
|
|
36
|
+
self.bias,
|
|
37
|
+
self.variance_epsilon,
|
|
38
|
+
self.in_place,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def extra_repr(self):
|
|
42
|
+
return f"weight_shape={tuple(self.weight.shape)}, eps={self.variance_epsilon}, in_place={self.in_place}"
|
|
@@ -77,3 +77,10 @@ class LigerRMSNormForGlm4(LigerRMSNorm):
|
|
|
77
77
|
self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", init_fn="ones", in_place=False, row_mode=None
|
|
78
78
|
):
|
|
79
79
|
super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class LigerRMSNormForQwen3Next(LigerRMSNorm):
|
|
83
|
+
def __init__(
|
|
84
|
+
self, hidden_size, eps=1e-6, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=False, row_mode=None
|
|
85
|
+
):
|
|
86
|
+
super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: liger_kernel
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -35,15 +35,14 @@ Requires-Dist: triton>=2.3.1
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: black>=24.4.2; extra == "dev"
|
|
40
|
-
Requires-Dist: isort>=5.13.2; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.12.0; extra == "dev"
|
|
41
39
|
Requires-Dist: pytest>=7.1.2; extra == "dev"
|
|
42
40
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
43
43
|
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
|
44
44
|
Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
45
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
46
|
-
Requires-Dist: mkdocs; extra == "dev"
|
|
47
46
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
48
47
|
Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
49
48
|
Dynamic: license-file
|
|
@@ -181,8 +180,8 @@ y = orpo_loss(lm_head.weight, x, target)
|
|
|
181
180
|
- `triton >= 3.0.0` Install from pypi. (e.g. `pip install triton==3.0.0`)
|
|
182
181
|
|
|
183
182
|
```bash
|
|
184
|
-
|
|
185
|
-
|
|
183
|
+
pip install -e .[dev]
|
|
184
|
+
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
|
|
186
185
|
```
|
|
187
186
|
|
|
188
187
|
### Optional Dependencies
|
|
@@ -216,6 +215,9 @@ pip install -e .
|
|
|
216
215
|
|
|
217
216
|
# Setup Development Dependencies
|
|
218
217
|
pip install -e ".[dev]"
|
|
218
|
+
|
|
219
|
+
# NOTE -> For AMD users only
|
|
220
|
+
pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
|
|
219
221
|
```
|
|
220
222
|
|
|
221
223
|
|
|
@@ -312,6 +314,7 @@ loss.backward()
|
|
|
312
314
|
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
313
315
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
314
316
|
| GLM-4 | `liger_kernel.transformers.apply_liger_kernel_to_glm4` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
317
|
+
| InternVL3 | `liger_kernel.transformers.apply_liger_kernel_to_internvl` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
315
318
|
|
|
316
319
|
|
|
317
320
|
## Low-level APIs
|
|
@@ -391,17 +394,17 @@ loss.backward()
|
|
|
391
394
|
<td style="padding: 10px;">
|
|
392
395
|
<div style="display: block;">
|
|
393
396
|
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
394
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=
|
|
397
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?branch=main&event=push" alt="Build">
|
|
395
398
|
</a>
|
|
396
399
|
</div>
|
|
397
400
|
<div style="display: block;">
|
|
398
401
|
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
399
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=
|
|
402
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?branch=main&event=push" alt="Build">
|
|
400
403
|
</a>
|
|
401
404
|
</div>
|
|
402
405
|
<div style="display: block;">
|
|
403
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/
|
|
404
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=
|
|
406
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml">
|
|
407
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?branch=main&event=push" alt="Build">
|
|
405
408
|
</a>
|
|
406
409
|
</div>
|
|
407
410
|
</td>
|
|
@@ -5,22 +5,22 @@ liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EB
|
|
|
5
5
|
liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
|
|
6
6
|
liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=pZ07OQ6RI-c8uk96tDRlUXdt31-da7yWhfwircZlKRw,4198
|
|
7
7
|
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
|
8
|
-
liger_kernel/chunked_loss/dpo_loss.py,sha256=
|
|
8
|
+
liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
|
|
9
9
|
liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
|
|
10
10
|
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
|
|
11
|
-
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=
|
|
11
|
+
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=ZjpNP5VC-tXXIKb4AckkQ3iWWQeej-JoG4StJq3N0wg,13650
|
|
12
12
|
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=FIH85uUXAOgYx5Ax8MjFhJHVu-2pKtY7wSegd0zSyyY,18336
|
|
13
13
|
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
|
|
14
|
-
liger_kernel/chunked_loss/grpo_loss.py,sha256=
|
|
15
|
-
liger_kernel/chunked_loss/jsd_loss.py,sha256=
|
|
14
|
+
liger_kernel/chunked_loss/grpo_loss.py,sha256=SkZuKoW8K94UbWR-OtfopsQkuQ8tFOr_90AGR6_Mhes,12844
|
|
15
|
+
liger_kernel/chunked_loss/jsd_loss.py,sha256=gRhnmB8xwuz7FcMJi5v5eyBsq01owaCbcyyrF4rYtY0,7133
|
|
16
16
|
liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
|
|
17
17
|
liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
|
|
18
18
|
liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
|
|
19
19
|
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
liger_kernel/ops/cross_entropy.py,sha256=
|
|
20
|
+
liger_kernel/ops/cross_entropy.py,sha256=CEgAeX97ezIBRhK3dPQRKsEQiwgnBDOewtDoqKXzw_Q,19605
|
|
21
21
|
liger_kernel/ops/dyt.py,sha256=gCLz4S8aul8SY9nvIGaoK67aGb7U9MJRQdo3ONqmQYs,5417
|
|
22
22
|
liger_kernel/ops/fused_add_rms_norm.py,sha256=UBqmlqFCmhSAIpkNKd8rrfXatX7Z4J9bp2dX9A0lrJQ,14017
|
|
23
|
-
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=
|
|
23
|
+
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=rL6PyM4_9CLj7OL6qHa_ssFdJn0JEZlE12znF7T5cvM,14521
|
|
24
24
|
liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
|
|
25
25
|
liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
|
|
26
26
|
liger_kernel/ops/geglu.py,sha256=r0WSq9E93zzynL44Wh8femzOWK07_SseBM_pJUyxT3s,4144
|
|
@@ -28,8 +28,10 @@ liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2wogg
|
|
|
28
28
|
liger_kernel/ops/grpo_loss.py,sha256=anRnv7k1-AV3pCC6_TqP0GMg78YYUfRAJrbpx6PVhl0,9448
|
|
29
29
|
liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
|
|
30
30
|
liger_kernel/ops/kl_div.py,sha256=ZjGdDLKWksHT9dZ0xF_TDgAkj5cuMTwwT5tr9E-_24o,8734
|
|
31
|
-
liger_kernel/ops/layer_norm.py,sha256=
|
|
31
|
+
liger_kernel/ops/layer_norm.py,sha256=WmiORsIyufOhazmYZTPjeSc5Z-xTAYwXAKqUcCv_dlY,9807
|
|
32
|
+
liger_kernel/ops/llama4_rope.py,sha256=-aqdZzllklTN8b9--e-TsWY_ntGCN8-tyseT4x0bd8s,8223
|
|
32
33
|
liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
|
|
34
|
+
liger_kernel/ops/poly_norm.py,sha256=MLgI8Ea93fugKibHCUauQ2ASYVXCvpPZe5v3kQZU6po,11152
|
|
33
35
|
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
34
36
|
liger_kernel/ops/rms_norm.py,sha256=DtvsWN5YktFAoc0JYSAwVeoZfryBFJlX-ipU7ooP01A,18891
|
|
35
37
|
liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
|
|
@@ -40,14 +42,14 @@ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
|
40
42
|
liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
|
|
41
43
|
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
42
44
|
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
43
|
-
liger_kernel/transformers/__init__.py,sha256=
|
|
45
|
+
liger_kernel/transformers/__init__.py,sha256=MAAd-YqPdG-j_sbrIE43nrICpA4xTg-dx6M06KWLMFU,9486
|
|
44
46
|
liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
|
|
45
47
|
liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
|
|
46
48
|
liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
|
|
47
49
|
liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
|
|
48
|
-
liger_kernel/transformers/functional.py,sha256=
|
|
50
|
+
liger_kernel/transformers/functional.py,sha256=a8EGYjHDg34rhnaD4JpU8I20XJ7xiqJvqqjoh4NcwYk,8022
|
|
49
51
|
liger_kernel/transformers/fused_add_rms_norm.py,sha256=7_Bzg-x6lLe6W1qG2DtjDALhEpNZlC6N5GppEs9cTYY,1199
|
|
50
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=
|
|
52
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=toa54dpmJduoZLhU3lJA-HPZ03MYcMKekDWPcdYjvYA,2020
|
|
51
53
|
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
52
54
|
liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
|
|
53
55
|
liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
|
|
@@ -56,45 +58,54 @@ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-
|
|
|
56
58
|
liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
|
|
57
59
|
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
58
60
|
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
59
|
-
liger_kernel/transformers/
|
|
60
|
-
liger_kernel/transformers/
|
|
61
|
+
liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
|
|
62
|
+
liger_kernel/transformers/monkey_patch.py,sha256=NWinrSt9_h4aF2Uax8jZ3of_z1LGmJY_yW9fW6EDieU,115774
|
|
63
|
+
liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
|
|
64
|
+
liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
|
|
61
65
|
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
62
|
-
liger_kernel/transformers/rms_norm.py,sha256=
|
|
66
|
+
liger_kernel/transformers/rms_norm.py,sha256=HwddVqrqS58jE-M2_4NkFGARtCDBhGnkKyjBN9b3FYI,3004
|
|
63
67
|
liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
|
|
64
68
|
liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
|
|
65
69
|
liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
|
|
66
70
|
liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
|
|
67
71
|
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
68
72
|
liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
|
|
73
|
+
liger_kernel/transformers/experimental/__init__.py,sha256=oQqk-f32JYgWEP9DJCj6ty6bbJSGrdXsFDQFwGeX6vI,127
|
|
69
74
|
liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
|
|
70
75
|
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
-
liger_kernel/transformers/model/
|
|
72
|
-
liger_kernel/transformers/model/
|
|
73
|
-
liger_kernel/transformers/model/
|
|
74
|
-
liger_kernel/transformers/model/
|
|
75
|
-
liger_kernel/transformers/model/
|
|
76
|
-
liger_kernel/transformers/model/
|
|
77
|
-
liger_kernel/transformers/model/
|
|
78
|
-
liger_kernel/transformers/model/
|
|
79
|
-
liger_kernel/transformers/model/
|
|
80
|
-
liger_kernel/transformers/model/
|
|
81
|
-
liger_kernel/transformers/model/
|
|
82
|
-
liger_kernel/transformers/model/
|
|
83
|
-
liger_kernel/transformers/model/
|
|
84
|
-
liger_kernel/transformers/model/
|
|
85
|
-
liger_kernel/transformers/model/
|
|
86
|
-
liger_kernel/transformers/model/
|
|
87
|
-
liger_kernel/transformers/model/
|
|
88
|
-
liger_kernel/transformers/model/
|
|
89
|
-
liger_kernel/transformers/model/
|
|
90
|
-
liger_kernel/transformers/model/
|
|
76
|
+
liger_kernel/transformers/model/falcon_h1.py,sha256=DTzfT-5OzQ6I-pU80Vn5e5ibd1EOEbJV5cMTJFhfwFg,4302
|
|
77
|
+
liger_kernel/transformers/model/gemma.py,sha256=WryzpVmCm2H_XgLKNu3jJ6gVawjQDjapTetg4WHlbR4,10078
|
|
78
|
+
liger_kernel/transformers/model/gemma2.py,sha256=eOQEfJBKezJNNrirhkPSagGxr9qj_y4lENOZgjUZKpE,11471
|
|
79
|
+
liger_kernel/transformers/model/gemma3.py,sha256=-tvZw88S-STqmvdim-xrZZRJ17KLWoge_73ilIvhpIU,14157
|
|
80
|
+
liger_kernel/transformers/model/glm4.py,sha256=2TBM5-4URpj6uX96G1AZ_DrjAmQtgLwXGzBvaXtfwdk,5328
|
|
81
|
+
liger_kernel/transformers/model/glm4v.py,sha256=nlgEMOBjFEOu7a-cwwp9mWhTFqIs3QrOvcxW-uaPq-s,6022
|
|
82
|
+
liger_kernel/transformers/model/glm4v_moe.py,sha256=q3-R_FoQPayS85AriJWWebblXB6Ix9fvxhSrI3mHiz4,6237
|
|
83
|
+
liger_kernel/transformers/model/internvl.py,sha256=Uv8KGXOz9NhiKVZDeRNzAJH5kRuMZikUbswWM9u5KM0,6069
|
|
84
|
+
liger_kernel/transformers/model/llama.py,sha256=L_VuaxxFJpzEmpLnaqwBbI5-Q14Qgfj-ufhLydCWgdk,12903
|
|
85
|
+
liger_kernel/transformers/model/llama4.py,sha256=epEO_VD1gJCDovabSIQLxxncoh-TQTBfj-UgIlR5c7U,4281
|
|
86
|
+
liger_kernel/transformers/model/llava.py,sha256=t6kMiyBkteVam-ltiod2f1mevj8l8ZHxYDvfu9C_lEk,15196
|
|
87
|
+
liger_kernel/transformers/model/loss_utils.py,sha256=02RVkPI7Qs4ZP4yU_udCAvD_2hgIaHmxremRKe3N7EE,1885
|
|
88
|
+
liger_kernel/transformers/model/mistral.py,sha256=XmM4N21RIOkJ9PJ4PZ3DcRUhGUczn_lbx0plf1zeHb0,5571
|
|
89
|
+
liger_kernel/transformers/model/mixtral.py,sha256=SLdLO81AZL7zror0LXLkn2PHqKzjwMMs4kALNqoaT00,11571
|
|
90
|
+
liger_kernel/transformers/model/mllama.py,sha256=5q8q2BxQR_8hNZ83XrJIbndw-l6T7ZyFLM7OCv_uPK0,11593
|
|
91
|
+
liger_kernel/transformers/model/olmo2.py,sha256=9O1Cze2B6ON-i1jgjQwjpS_WsDEK0PzL003s-MkevWA,5330
|
|
92
|
+
liger_kernel/transformers/model/paligemma.py,sha256=mnTnSmEDla_bbVmPFmqhNVT__Cuf-TM-KLGFUa1sU-4,19967
|
|
93
|
+
liger_kernel/transformers/model/phi3.py,sha256=L4gG8htOABmaxzcmHph0bBFCACRvL9r6wuDVFXi2o7Q,4117
|
|
94
|
+
liger_kernel/transformers/model/qwen2.py,sha256=lgn0X6EzAZUhOv17ZDD9choIDdaPVIAsIrrdvwzWXqs,10033
|
|
95
|
+
liger_kernel/transformers/model/qwen2_5_vl.py,sha256=Ea3zvL1FJfjlaerpeXCq-1zmorrajwNsR-XsgWr4fFQ,6465
|
|
96
|
+
liger_kernel/transformers/model/qwen2_vl.py,sha256=ZeasFPGs-bxm2Y_E15mo0YNx5wwtKYDV-bjVKjkLPBk,6018
|
|
97
|
+
liger_kernel/transformers/model/qwen3.py,sha256=Q2aOg5erPrgVgRcqJm8sefLSDtvU1AD5B7aJnP7mRMM,4956
|
|
98
|
+
liger_kernel/transformers/model/qwen3_moe.py,sha256=1CwTMCNFDYsjGoa_aHFBagtC5HuJTV-s0__5UvcjD3A,5686
|
|
99
|
+
liger_kernel/transformers/model/qwen3_next.py,sha256=7To7azriAogxeE7oEvByKztH9154dnDiDVNHHm7PZK4,5632
|
|
100
|
+
liger_kernel/transformers/model/smollm3.py,sha256=0KWVkDtXbjsBKhJnaquV6vUUYyLtfmNwYH0sxJt-qTk,7667
|
|
101
|
+
liger_kernel/transformers/model/smolvlm.py,sha256=yFpPKawLVo3zXzLjM7Y_T8FyRrPxVyp-YPFMM8m3k0c,6734
|
|
91
102
|
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
92
103
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
93
104
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
94
105
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
95
|
-
liger_kernel-0.6.
|
|
96
|
-
liger_kernel-0.6.
|
|
97
|
-
liger_kernel-0.6.
|
|
98
|
-
liger_kernel-0.6.
|
|
99
|
-
liger_kernel-0.6.
|
|
100
|
-
liger_kernel-0.6.
|
|
106
|
+
liger_kernel-0.6.3.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
107
|
+
liger_kernel-0.6.3.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
108
|
+
liger_kernel-0.6.3.dist-info/METADATA,sha256=n9tHig7KRoszPUoLj3yvGp89iubHb2wDwXAsrg7XPFo,24820
|
|
109
|
+
liger_kernel-0.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
110
|
+
liger_kernel-0.6.3.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
111
|
+
liger_kernel-0.6.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|