liger-kernel 0.6.1__py3-none-any.whl → 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. liger_kernel/chunked_loss/dpo_loss.py +54 -3
  2. liger_kernel/chunked_loss/fused_linear_ppo.py +4 -0
  3. liger_kernel/chunked_loss/grpo_loss.py +38 -4
  4. liger_kernel/chunked_loss/jsd_loss.py +5 -2
  5. liger_kernel/ops/cross_entropy.py +59 -53
  6. liger_kernel/ops/fused_linear_cross_entropy.py +83 -17
  7. liger_kernel/ops/layer_norm.py +4 -6
  8. liger_kernel/ops/llama4_rope.py +225 -0
  9. liger_kernel/ops/poly_norm.py +386 -0
  10. liger_kernel/transformers/__init__.py +32 -0
  11. liger_kernel/transformers/experimental/__init__.py +5 -0
  12. liger_kernel/transformers/functional.py +9 -0
  13. liger_kernel/transformers/fused_linear_cross_entropy.py +8 -1
  14. liger_kernel/transformers/llama4_rope.py +93 -0
  15. liger_kernel/transformers/model/falcon_h1.py +108 -0
  16. liger_kernel/transformers/model/gemma.py +2 -1
  17. liger_kernel/transformers/model/gemma2.py +8 -2
  18. liger_kernel/transformers/model/gemma3.py +27 -2
  19. liger_kernel/transformers/model/glm4.py +2 -1
  20. liger_kernel/transformers/model/glm4v.py +151 -0
  21. liger_kernel/transformers/model/glm4v_moe.py +153 -0
  22. liger_kernel/transformers/model/internvl.py +150 -0
  23. liger_kernel/transformers/model/llama.py +2 -1
  24. liger_kernel/transformers/model/llama4.py +2 -1
  25. liger_kernel/transformers/model/llava.py +6 -2
  26. liger_kernel/transformers/model/loss_utils.py +3 -0
  27. liger_kernel/transformers/model/mistral.py +2 -1
  28. liger_kernel/transformers/model/mixtral.py +8 -2
  29. liger_kernel/transformers/model/mllama.py +6 -3
  30. liger_kernel/transformers/model/olmo2.py +2 -1
  31. liger_kernel/transformers/model/paligemma.py +19 -0
  32. liger_kernel/transformers/model/phi3.py +10 -160
  33. liger_kernel/transformers/model/qwen2.py +2 -1
  34. liger_kernel/transformers/model/qwen2_5_vl.py +7 -2
  35. liger_kernel/transformers/model/qwen2_vl.py +7 -2
  36. liger_kernel/transformers/model/qwen3.py +2 -1
  37. liger_kernel/transformers/model/qwen3_moe.py +8 -2
  38. liger_kernel/transformers/model/qwen3_next.py +134 -0
  39. liger_kernel/transformers/model/smollm3.py +2 -1
  40. liger_kernel/transformers/model/smolvlm.py +158 -0
  41. liger_kernel/transformers/monkey_patch.py +552 -23
  42. liger_kernel/transformers/multi_token_attention.py +1 -1
  43. liger_kernel/transformers/poly_norm.py +42 -0
  44. liger_kernel/transformers/rms_norm.py +7 -0
  45. {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/METADATA +14 -11
  46. {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/RECORD +50 -39
  47. {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/WHEEL +0 -0
  48. {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/licenses/LICENSE +0 -0
  49. {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/licenses/NOTICE +0 -0
  50. {liger_kernel-0.6.1.dist-info → liger_kernel-0.6.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,42 @@
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from liger_kernel.ops.poly_norm import LigerPolyNormFunction
5
+
6
+
7
+ class LigerPolyNorm(nn.Module):
8
+ """
9
+ PolyNorm layer wrapper for Liger kernel.
10
+
11
+ PolyNorm formula:
12
+ y = w₀·norm(x³) + w₁·norm(x²) + w₂·norm(x) + b
13
+ where norm(u) = u / sqrt(mean(u²) + ε)
14
+
15
+ Reference:
16
+ https://github.com/BryceZhuo/PolyCom/
17
+
18
+ Args:
19
+ eps: epsilon for numerical stability (default: 1e-6)
20
+ in_place: whether to in-place modify grad_output in backward to save memory (default: False).
21
+ Set to True to save memory if grad_output is not needed elsewhere.
22
+ """
23
+
24
+ def __init__(self, eps=1e-6, in_place=True):
25
+ super().__init__()
26
+ # Align with PolyCom reference: initialize weights to (1/3, 1/3, 1/3) and bias to 1.0
27
+ self.weight = nn.Parameter(torch.full((3,), 1.0 / 3.0))
28
+ self.bias = nn.Parameter(torch.tensor(1.0))
29
+ self.variance_epsilon = eps
30
+ self.in_place = in_place
31
+
32
+ def forward(self, hidden_states):
33
+ return LigerPolyNormFunction.apply(
34
+ hidden_states,
35
+ self.weight,
36
+ self.bias,
37
+ self.variance_epsilon,
38
+ self.in_place,
39
+ )
40
+
41
+ def extra_repr(self):
42
+ return f"weight_shape={tuple(self.weight.shape)}, eps={self.variance_epsilon}, in_place={self.in_place}"
@@ -77,3 +77,10 @@ class LigerRMSNormForGlm4(LigerRMSNorm):
77
77
  self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", init_fn="ones", in_place=False, row_mode=None
78
78
  ):
79
79
  super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
80
+
81
+
82
+ class LigerRMSNormForQwen3Next(LigerRMSNorm):
83
+ def __init__(
84
+ self, hidden_size, eps=1e-6, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=False, row_mode=None
85
+ ):
86
+ super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: liger_kernel
3
- Version: 0.6.1
3
+ Version: 0.6.3
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -35,15 +35,14 @@ Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
- Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
- Requires-Dist: black>=24.4.2; extra == "dev"
40
- Requires-Dist: isort>=5.13.2; extra == "dev"
38
+ Requires-Dist: ruff>=0.12.0; extra == "dev"
41
39
  Requires-Dist: pytest>=7.1.2; extra == "dev"
42
40
  Requires-Dist: pytest-xdist; extra == "dev"
41
+ Requires-Dist: pytest-cov; extra == "dev"
42
+ Requires-Dist: pytest-asyncio; extra == "dev"
43
43
  Requires-Dist: pytest-rerunfailures; extra == "dev"
44
44
  Requires-Dist: datasets>=2.19.2; extra == "dev"
45
45
  Requires-Dist: seaborn; extra == "dev"
46
- Requires-Dist: mkdocs; extra == "dev"
47
46
  Requires-Dist: mkdocs-material; extra == "dev"
48
47
  Requires-Dist: torchvision>=0.20; extra == "dev"
49
48
  Dynamic: license-file
@@ -181,8 +180,8 @@ y = orpo_loss(lm_head.weight, x, target)
181
180
  - `triton >= 3.0.0` Install from pypi. (e.g. `pip install triton==3.0.0`)
182
181
 
183
182
  ```bash
184
- # Need to pass the url when installing
185
- pip install -e .[dev] --extra-index-url https://download.pytorch.org/whl/nightly/rocm6.2
183
+ pip install -e .[dev]
184
+ pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
186
185
  ```
187
186
 
188
187
  ### Optional Dependencies
@@ -216,6 +215,9 @@ pip install -e .
216
215
 
217
216
  # Setup Development Dependencies
218
217
  pip install -e ".[dev]"
218
+
219
+ # NOTE -> For AMD users only
220
+ pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
219
221
  ```
220
222
 
221
223
 
@@ -312,6 +314,7 @@ loss.backward()
312
314
  | Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
313
315
  | OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
314
316
  | GLM-4 | `liger_kernel.transformers.apply_liger_kernel_to_glm4` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
317
+ | InternVL3 | `liger_kernel.transformers.apply_liger_kernel_to_internvl` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
315
318
 
316
319
 
317
320
  ## Low-level APIs
@@ -391,17 +394,17 @@ loss.backward()
391
394
  <td style="padding: 10px;">
392
395
  <div style="display: block;">
393
396
  <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
394
- <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
397
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?branch=main&event=push" alt="Build">
395
398
  </a>
396
399
  </div>
397
400
  <div style="display: block;">
398
401
  <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
399
- <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
402
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?branch=main&event=push" alt="Build">
400
403
  </a>
401
404
  </div>
402
405
  <div style="display: block;">
403
- <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
404
- <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
406
+ <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml">
407
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?branch=main&event=push" alt="Build">
405
408
  </a>
406
409
  </div>
407
410
  </td>
@@ -5,22 +5,22 @@ liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EB
5
5
  liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
6
6
  liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=pZ07OQ6RI-c8uk96tDRlUXdt31-da7yWhfwircZlKRw,4198
7
7
  liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
8
- liger_kernel/chunked_loss/dpo_loss.py,sha256=tapMiNdI8_ufW55iG0Ud4dmiW39gu1DzlvtoOCHrdGg,6259
8
+ liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
9
9
  liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
10
10
  liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
11
- liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=AA19cpv6D8mo5RbSK5GRCcZoOSnpxV_Z1eJlAsC5eic,13434
11
+ liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=ZjpNP5VC-tXXIKb4AckkQ3iWWQeej-JoG4StJq3N0wg,13650
12
12
  liger_kernel/chunked_loss/fused_linear_preference.py,sha256=FIH85uUXAOgYx5Ax8MjFhJHVu-2pKtY7wSegd0zSyyY,18336
13
13
  liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
14
- liger_kernel/chunked_loss/grpo_loss.py,sha256=kuqHkYV383sUxqJN-DMsfADHi2hxHVyKx5S24TNc8bQ,10866
15
- liger_kernel/chunked_loss/jsd_loss.py,sha256=uInjy-KtKNJs46Wk0AlMO9e3UYo33KJhoCl8KL8ypGU,7081
14
+ liger_kernel/chunked_loss/grpo_loss.py,sha256=SkZuKoW8K94UbWR-OtfopsQkuQ8tFOr_90AGR6_Mhes,12844
15
+ liger_kernel/chunked_loss/jsd_loss.py,sha256=gRhnmB8xwuz7FcMJi5v5eyBsq01owaCbcyyrF4rYtY0,7133
16
16
  liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
17
17
  liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
18
18
  liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
19
19
  liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- liger_kernel/ops/cross_entropy.py,sha256=e8THGnhOcy_0SbOLABx67HEM7-B8a8pG7nDKbCRpQKM,19123
20
+ liger_kernel/ops/cross_entropy.py,sha256=CEgAeX97ezIBRhK3dPQRKsEQiwgnBDOewtDoqKXzw_Q,19605
21
21
  liger_kernel/ops/dyt.py,sha256=gCLz4S8aul8SY9nvIGaoK67aGb7U9MJRQdo3ONqmQYs,5417
22
22
  liger_kernel/ops/fused_add_rms_norm.py,sha256=UBqmlqFCmhSAIpkNKd8rrfXatX7Z4J9bp2dX9A0lrJQ,14017
23
- liger_kernel/ops/fused_linear_cross_entropy.py,sha256=5fbGhN85n3zf0uIdJ7PYHWIRzTf0VTFiS0ARtOmqIP0,11020
23
+ liger_kernel/ops/fused_linear_cross_entropy.py,sha256=rL6PyM4_9CLj7OL6qHa_ssFdJn0JEZlE12znF7T5cvM,14521
24
24
  liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
25
25
  liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
26
26
  liger_kernel/ops/geglu.py,sha256=r0WSq9E93zzynL44Wh8femzOWK07_SseBM_pJUyxT3s,4144
@@ -28,8 +28,10 @@ liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2wogg
28
28
  liger_kernel/ops/grpo_loss.py,sha256=anRnv7k1-AV3pCC6_TqP0GMg78YYUfRAJrbpx6PVhl0,9448
29
29
  liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
30
30
  liger_kernel/ops/kl_div.py,sha256=ZjGdDLKWksHT9dZ0xF_TDgAkj5cuMTwwT5tr9E-_24o,8734
31
- liger_kernel/ops/layer_norm.py,sha256=BHPDuaogMTfIJkBJdqLZbOQouNWTf3fJVyOQOD7blCE,9901
31
+ liger_kernel/ops/layer_norm.py,sha256=WmiORsIyufOhazmYZTPjeSc5Z-xTAYwXAKqUcCv_dlY,9807
32
+ liger_kernel/ops/llama4_rope.py,sha256=-aqdZzllklTN8b9--e-TsWY_ntGCN8-tyseT4x0bd8s,8223
32
33
  liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
34
+ liger_kernel/ops/poly_norm.py,sha256=MLgI8Ea93fugKibHCUauQ2ASYVXCvpPZe5v3kQZU6po,11152
33
35
  liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
34
36
  liger_kernel/ops/rms_norm.py,sha256=DtvsWN5YktFAoc0JYSAwVeoZfryBFJlX-ipU7ooP01A,18891
35
37
  liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
@@ -40,14 +42,14 @@ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
40
42
  liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
41
43
  liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
42
44
  liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
43
- liger_kernel/transformers/__init__.py,sha256=VoHQp5emsAJAouql37RuvtGFeZCoMIHgoIxfsyYMTc8,7564
45
+ liger_kernel/transformers/__init__.py,sha256=MAAd-YqPdG-j_sbrIE43nrICpA4xTg-dx6M06KWLMFU,9486
44
46
  liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
45
47
  liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
46
48
  liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
47
49
  liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
48
- liger_kernel/transformers/functional.py,sha256=PXnACWD7kzgge50RdOUuvtmOTS7DVkkrL7mm0cX5bOc,7734
50
+ liger_kernel/transformers/functional.py,sha256=a8EGYjHDg34rhnaD4JpU8I20XJ7xiqJvqqjoh4NcwYk,8022
49
51
  liger_kernel/transformers/fused_add_rms_norm.py,sha256=7_Bzg-x6lLe6W1qG2DtjDALhEpNZlC6N5GppEs9cTYY,1199
50
- liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=O8Sg5BT81nTaY9fSGoOY9dOD9ekibwwiuXhdUHaxntQ,1742
52
+ liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=toa54dpmJduoZLhU3lJA-HPZ03MYcMKekDWPcdYjvYA,2020
51
53
  liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
52
54
  liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
53
55
  liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
@@ -56,45 +58,54 @@ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-
56
58
  liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
57
59
  liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
58
60
  liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
59
- liger_kernel/transformers/monkey_patch.py,sha256=tXKo4EKVp3szpdqPh051oLZFrlg_hCbWRv0RpSX_kfY,89238
60
- liger_kernel/transformers/multi_token_attention.py,sha256=l9VDICK0dfmifUDW668hGscP8AHq2rYcM2oGUa3baRQ,1751
61
+ liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
62
+ liger_kernel/transformers/monkey_patch.py,sha256=NWinrSt9_h4aF2Uax8jZ3of_z1LGmJY_yW9fW6EDieU,115774
63
+ liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
64
+ liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
61
65
  liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
62
- liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
66
+ liger_kernel/transformers/rms_norm.py,sha256=HwddVqrqS58jE-M2_4NkFGARtCDBhGnkKyjBN9b3FYI,3004
63
67
  liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
64
68
  liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
65
69
  liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
66
70
  liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
67
71
  liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
68
72
  liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
73
+ liger_kernel/transformers/experimental/__init__.py,sha256=oQqk-f32JYgWEP9DJCj6ty6bbJSGrdXsFDQFwGeX6vI,127
69
74
  liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
70
75
  liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
- liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7mjXipGt-eS0,10007
72
- liger_kernel/transformers/model/gemma2.py,sha256=R_JFPyWTk7RyA7D05ZiIaNO5pX8gWcvfWf-6rdCRMxs,11296
73
- liger_kernel/transformers/model/gemma3.py,sha256=FKO4j3t4W_5uECRA1lhVnXC-It2GhirHm4tpCf9ApAc,12785
74
- liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
75
- liger_kernel/transformers/model/llama.py,sha256=i8jJgyZsMKWQ-zKloETLugtwFpUOdaWxLDceciFXKd4,12832
76
- liger_kernel/transformers/model/llama4.py,sha256=IgbB8sTh3dlETQnaNNy1bZLuXy-Nt7qmeAjF27ydGpg,4210
77
- liger_kernel/transformers/model/llava.py,sha256=bLCioday_SOm69ogMDBhy_4UsVkH2-BSl93-EXY6-7I,15076
78
- liger_kernel/transformers/model/loss_utils.py,sha256=WWAMdiONPaXpIvxyOim_0igLrYh0yyOok5Q9_L9xvZw,1787
79
- liger_kernel/transformers/model/mistral.py,sha256=syYNL8dLThX2-4uC13Lu0krEZ5zw3InviDUR3AJmc-I,5500
80
- liger_kernel/transformers/model/mixtral.py,sha256=VY-y73IyjcCyWyI7ahxXLw0fJrhgjYfr1xwRYtsHX0o,11396
81
- liger_kernel/transformers/model/mllama.py,sha256=my29NXk-p6ckQaP8qDIN8e318yI_9mQZHt38MV3SqLY,11280
82
- liger_kernel/transformers/model/olmo2.py,sha256=6L_bo-ZUgO1lYppdJneOtYxNIylQKS6BiGp13g7Uq9E,5259
83
- liger_kernel/transformers/model/paligemma.py,sha256=xuIx3oOwTgftU3jqLfWOxUxgCLBNJh0yNC21an9qDjo,18773
84
- liger_kernel/transformers/model/phi3.py,sha256=zAzBVNOA16B16yy2HWsEgOMHhLoYkpWOWPgBT4z95WI,10655
85
- liger_kernel/transformers/model/qwen2.py,sha256=3fpOTEOkniQmkCfN1KUa3KhseHJVzhj2Ht9FdYPUy-E,9962
86
- liger_kernel/transformers/model/qwen2_5_vl.py,sha256=zEVVwotCXnAm3RRc8-1Nc8uitSWrwW4B9dYY2uOZDwg,6331
87
- liger_kernel/transformers/model/qwen2_vl.py,sha256=5vK-vtCDpKZ2w33xYp2BS8kQYWUbKMqaiKvQcI27Mss,5884
88
- liger_kernel/transformers/model/qwen3.py,sha256=w2jBHuK9kK9EmOr5dnEIXNQXUgUSV_sJUkXSEwxLPHs,4885
89
- liger_kernel/transformers/model/qwen3_moe.py,sha256=BkpfFH3fOH0yRfA7LF-AoHTLut2GV0Y4MOlkiIYewfU,5511
90
- liger_kernel/transformers/model/smollm3.py,sha256=mqayvpwpMbp2yd_Ue7IPzy-dA4KHSDi_ROZW5vHCHfQ,7596
76
+ liger_kernel/transformers/model/falcon_h1.py,sha256=DTzfT-5OzQ6I-pU80Vn5e5ibd1EOEbJV5cMTJFhfwFg,4302
77
+ liger_kernel/transformers/model/gemma.py,sha256=WryzpVmCm2H_XgLKNu3jJ6gVawjQDjapTetg4WHlbR4,10078
78
+ liger_kernel/transformers/model/gemma2.py,sha256=eOQEfJBKezJNNrirhkPSagGxr9qj_y4lENOZgjUZKpE,11471
79
+ liger_kernel/transformers/model/gemma3.py,sha256=-tvZw88S-STqmvdim-xrZZRJ17KLWoge_73ilIvhpIU,14157
80
+ liger_kernel/transformers/model/glm4.py,sha256=2TBM5-4URpj6uX96G1AZ_DrjAmQtgLwXGzBvaXtfwdk,5328
81
+ liger_kernel/transformers/model/glm4v.py,sha256=nlgEMOBjFEOu7a-cwwp9mWhTFqIs3QrOvcxW-uaPq-s,6022
82
+ liger_kernel/transformers/model/glm4v_moe.py,sha256=q3-R_FoQPayS85AriJWWebblXB6Ix9fvxhSrI3mHiz4,6237
83
+ liger_kernel/transformers/model/internvl.py,sha256=Uv8KGXOz9NhiKVZDeRNzAJH5kRuMZikUbswWM9u5KM0,6069
84
+ liger_kernel/transformers/model/llama.py,sha256=L_VuaxxFJpzEmpLnaqwBbI5-Q14Qgfj-ufhLydCWgdk,12903
85
+ liger_kernel/transformers/model/llama4.py,sha256=epEO_VD1gJCDovabSIQLxxncoh-TQTBfj-UgIlR5c7U,4281
86
+ liger_kernel/transformers/model/llava.py,sha256=t6kMiyBkteVam-ltiod2f1mevj8l8ZHxYDvfu9C_lEk,15196
87
+ liger_kernel/transformers/model/loss_utils.py,sha256=02RVkPI7Qs4ZP4yU_udCAvD_2hgIaHmxremRKe3N7EE,1885
88
+ liger_kernel/transformers/model/mistral.py,sha256=XmM4N21RIOkJ9PJ4PZ3DcRUhGUczn_lbx0plf1zeHb0,5571
89
+ liger_kernel/transformers/model/mixtral.py,sha256=SLdLO81AZL7zror0LXLkn2PHqKzjwMMs4kALNqoaT00,11571
90
+ liger_kernel/transformers/model/mllama.py,sha256=5q8q2BxQR_8hNZ83XrJIbndw-l6T7ZyFLM7OCv_uPK0,11593
91
+ liger_kernel/transformers/model/olmo2.py,sha256=9O1Cze2B6ON-i1jgjQwjpS_WsDEK0PzL003s-MkevWA,5330
92
+ liger_kernel/transformers/model/paligemma.py,sha256=mnTnSmEDla_bbVmPFmqhNVT__Cuf-TM-KLGFUa1sU-4,19967
93
+ liger_kernel/transformers/model/phi3.py,sha256=L4gG8htOABmaxzcmHph0bBFCACRvL9r6wuDVFXi2o7Q,4117
94
+ liger_kernel/transformers/model/qwen2.py,sha256=lgn0X6EzAZUhOv17ZDD9choIDdaPVIAsIrrdvwzWXqs,10033
95
+ liger_kernel/transformers/model/qwen2_5_vl.py,sha256=Ea3zvL1FJfjlaerpeXCq-1zmorrajwNsR-XsgWr4fFQ,6465
96
+ liger_kernel/transformers/model/qwen2_vl.py,sha256=ZeasFPGs-bxm2Y_E15mo0YNx5wwtKYDV-bjVKjkLPBk,6018
97
+ liger_kernel/transformers/model/qwen3.py,sha256=Q2aOg5erPrgVgRcqJm8sefLSDtvU1AD5B7aJnP7mRMM,4956
98
+ liger_kernel/transformers/model/qwen3_moe.py,sha256=1CwTMCNFDYsjGoa_aHFBagtC5HuJTV-s0__5UvcjD3A,5686
99
+ liger_kernel/transformers/model/qwen3_next.py,sha256=7To7azriAogxeE7oEvByKztH9154dnDiDVNHHm7PZK4,5632
100
+ liger_kernel/transformers/model/smollm3.py,sha256=0KWVkDtXbjsBKhJnaquV6vUUYyLtfmNwYH0sxJt-qTk,7667
101
+ liger_kernel/transformers/model/smolvlm.py,sha256=yFpPKawLVo3zXzLjM7Y_T8FyRrPxVyp-YPFMM8m3k0c,6734
91
102
  liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
92
103
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
93
104
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
94
105
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
95
- liger_kernel-0.6.1.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
96
- liger_kernel-0.6.1.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
97
- liger_kernel-0.6.1.dist-info/METADATA,sha256=_of0e7dKufrp2upc26bnv4VLBZvAbcdDA8Fssm3mIfk,24545
98
- liger_kernel-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
99
- liger_kernel-0.6.1.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
100
- liger_kernel-0.6.1.dist-info/RECORD,,
106
+ liger_kernel-0.6.3.dist-info/licenses/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
107
+ liger_kernel-0.6.3.dist-info/licenses/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
108
+ liger_kernel-0.6.3.dist-info/METADATA,sha256=n9tHig7KRoszPUoLj3yvGp89iubHb2wDwXAsrg7XPFo,24820
109
+ liger_kernel-0.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
110
+ liger_kernel-0.6.3.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
111
+ liger_kernel-0.6.3.dist-info/RECORD,,