liger-kernel 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. liger_kernel/ops/cross_entropy.py +5 -39
  2. liger_kernel/ops/experimental/mm_int8int2.py +355 -0
  3. liger_kernel/ops/fused_linear_cross_entropy.py +12 -9
  4. liger_kernel/ops/fused_linear_jsd.py +245 -0
  5. liger_kernel/ops/geglu.py +2 -2
  6. liger_kernel/ops/jsd.py +176 -0
  7. liger_kernel/ops/kl_div.py +2 -2
  8. liger_kernel/ops/rms_norm.py +67 -42
  9. liger_kernel/ops/swiglu.py +2 -2
  10. liger_kernel/ops/utils.py +62 -1
  11. liger_kernel/transformers/__init__.py +3 -0
  12. liger_kernel/transformers/functional.py +4 -0
  13. liger_kernel/transformers/fused_linear_jsd.py +98 -0
  14. liger_kernel/transformers/jsd.py +75 -0
  15. liger_kernel/transformers/model/gemma.py +124 -1
  16. liger_kernel/transformers/model/llama.py +135 -4
  17. liger_kernel/transformers/model/mistral.py +3 -0
  18. liger_kernel/transformers/model/mixtral.py +153 -2
  19. liger_kernel/transformers/model/mllama.py +274 -0
  20. liger_kernel/transformers/model/phi3.py +140 -2
  21. liger_kernel/transformers/model/qwen2.py +123 -2
  22. liger_kernel/transformers/model/qwen2_vl.py +8 -1
  23. liger_kernel/transformers/monkey_patch.py +158 -7
  24. {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/METADATA +60 -28
  25. liger_kernel-0.4.0.dist-info/NOTICE +58 -0
  26. liger_kernel-0.4.0.dist-info/RECORD +48 -0
  27. {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/WHEEL +1 -1
  28. liger_kernel-0.3.1.dist-info/NOTICE +0 -4
  29. liger_kernel-0.3.1.dist-info/RECORD +0 -42
  30. {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/LICENSE +0 -0
  31. {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,58 @@
1
+ Copyright 2024 LinkedIn Corporation
2
+ All Rights Reserved.
3
+
4
+ Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information.
5
+
6
+ This product includes software developed by LinkedIn Corporation.
7
+
8
+ This product contains code derived from the following open source projects:
9
+
10
+ 1. Unsloth
11
+ Copyright (c) 2023 Unsloth AI
12
+ Licensed under the Apache License, Version 2.0
13
+ Source: https://github.com/unslothai/unsloth
14
+
15
+ The `calculate_settings` function to determine block size and warp is reused for Norm and MLP operations.
16
+ Modifications and additions were made to the RMS Norm implementation.
17
+
18
+ 2. Triton
19
+ Copyright (c) 2023 OpenAI
20
+ Licensed under the MIT License
21
+ Source: https://github.com/openai/triton
22
+
23
+ Modifications were made based on Triton tutorials for the RMS Norm implementation.
24
+
25
+ 3. Efficient Cross Entropy
26
+ Copyright (c) 2023 Mohamed Malek
27
+ Licensed under the MIT License
28
+ Source: https://github.com/mgmalek/efficient_cross_entropy
29
+
30
+ The idea of gradient-in-forward and chunking was used in the Linear Cross Entropy implementation.
31
+
32
+ 4. Flash Attention
33
+ Copyright (c) 2023 Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, Christopher Ré
34
+ Licensed under the BSD 3-Clause License
35
+ Source: https://github.com/Dao-AILab/flash-attention
36
+
37
+ Optimization ideas such as tiling and recomputation were inspired by this work.
38
+
39
+ 5. AutoAWQ
40
+ Copyright (c) 2023 Casper Hansen
41
+ Licensed under the MIT License
42
+ Source: https://github.com/casper-hansen/AutoAWQ
43
+
44
+ The design of the automodel was referenced from this project.
45
+
46
+ 6. llm.c
47
+ Copyright (c) 2023 Andrej Karpathy
48
+ Licensed under the MIT License
49
+ Source: https://github.com/karpathy/llm.c
50
+
51
+ The design of end-to-end testing was referenced from this project.
52
+
53
+ 7. Tiny Shakespeare Dataset
54
+ Source: https://huggingface.co/datasets/karpathy/tiny_shakespeare
55
+
56
+ This dataset is used to conduct convergence tests on mini models.
57
+
58
+ For full license texts, please refer to the respective project repositories.
@@ -0,0 +1,48 @@
1
+ liger_kernel/env_report.py,sha256=LFUJ6UMkFFGPBYXBlqHFGy4bhsemEpSI-_1edSazlHI,1130
2
+ liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ liger_kernel/ops/cross_entropy.py,sha256=23Di7l0T20OBj8K3-0PYEA5FCJrrbiKs3xMGyLlzbtg,11248
4
+ liger_kernel/ops/fused_linear_cross_entropy.py,sha256=M-cF4BO-vvso2BIdk7-Q2FleeFPhqSQwZR1EirPC4OE,9456
5
+ liger_kernel/ops/fused_linear_jsd.py,sha256=5D_obamh08lGGTMyh85kBJD_aNjPhOYf4-TmCZ6m4s4,9626
6
+ liger_kernel/ops/geglu.py,sha256=MQL4zyzneZqZYUGPvb1QjI_EYT9_pKfSDgR25WD9jrI,4127
7
+ liger_kernel/ops/jsd.py,sha256=anWfdioucxZy4JQfTvbHBR-IQrZKeH-gBF1MHwwTuTQ,5781
8
+ liger_kernel/ops/kl_div.py,sha256=03FNXfvCb6M-56hhFepAFV9p6brArPR6KOKkdGD34mw,8374
9
+ liger_kernel/ops/layer_norm.py,sha256=unGMYMOPqtkM9aTrokhcqgPmsV2AUN7Yzv86isVB9OI,7422
10
+ liger_kernel/ops/rms_norm.py,sha256=9S9wyZLmzNyJlBxV4vbv4p5es7bGP-m_5wK9JC6JIdA,10911
11
+ liger_kernel/ops/rope.py,sha256=jrzaA9-6Orn44y_IIam9_YNPQxOFK2FrIRNfFea4EtU,8513
12
+ liger_kernel/ops/swiglu.py,sha256=Fwxtd76rhHKT9ShQAGca9RsnASplAVxtYKHmiT73_yA,2994
13
+ liger_kernel/ops/utils.py,sha256=3JSF--O7KT5Wa5BuO70M4h0XetxoZ_e9IoW9GRlxlBg,3777
14
+ liger_kernel/ops/experimental/embedding.py,sha256=LYR66dB-jhvhtUjeV4PnNro-n77J1mdlmpSLSxB3Y6U,4186
15
+ liger_kernel/ops/experimental/mm_int8int2.py,sha256=JpGVZCgRC6T8XMUJ_QbZRS2XU1bh0urIZphs5DTc1mY,13358
16
+ liger_kernel/transformers/__init__.py,sha256=gia-eBxr7TLxU0GdDf8AfCY4WgDlFLqIGSt7EoQGsBA,1336
17
+ liger_kernel/transformers/auto_model.py,sha256=RMIwQHSiXoksXFTIqFZ4PLBgoqkxJJAT3q1Qh47bGN8,1552
18
+ liger_kernel/transformers/cross_entropy.py,sha256=gL30VByCSA_iQSkhV6no70x_IUqqFSTMJdytppico_w,804
19
+ liger_kernel/transformers/functional.py,sha256=zlQ1yKOIZe-ZGmFicRMrlFJfAt8zzBWu8L4GVQbp_e8,1124
20
+ liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=-07t8YRajZTrJOG2rUzt6Ur7kNuWgarWcqy7ou5Da8k,629
21
+ liger_kernel/transformers/fused_linear_jsd.py,sha256=MJ-KjmLZnakuoVpnbDGkd95DQgvESniyrRWYzollVZM,4066
22
+ liger_kernel/transformers/geglu.py,sha256=QcrME_8ooIn0xa59LaC0aoOdRrBIFd11Y0bAyF0NfCw,1130
23
+ liger_kernel/transformers/jsd.py,sha256=W-5CypO2mx4-bUWOxq1KScfCdoXlLoYbtt5xBnRzMs4,3056
24
+ liger_kernel/transformers/kl_div.py,sha256=qVhjBg6tjRyue5iZ3NFxo8uySY4JuIFJyv0IM_50F24,431
25
+ liger_kernel/transformers/layer_norm.py,sha256=fd6o4kSHJWolQMWxh-l1qObfgL08ruNbUoBiANKX1ow,972
26
+ liger_kernel/transformers/monkey_patch.py,sha256=qetRIZmdHIDxE0TtWP5-rWS91NuGgRYRZBTqzJUojkI,35507
27
+ liger_kernel/transformers/rms_norm.py,sha256=4XfMQI6dORF7s_5qUqVHKWv-3IUomaimU2dg-NwnpoM,1035
28
+ liger_kernel/transformers/rope.py,sha256=m-ah8vZBYW8tfplTXCiAPMHJWlB1tdp_JPXJeWE-Boo,943
29
+ liger_kernel/transformers/swiglu.py,sha256=0-tVJ8xEYfhxnduc16PflXFj8sZPxdx9sHUn3hfwCI4,2468
30
+ liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
31
+ liger_kernel/transformers/experimental/embedding.py,sha256=HpckiAMKM8-SRxKDcGTqortVxnjhwpZsfsp9lfjqfeM,895
32
+ liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ liger_kernel/transformers/model/gemma.py,sha256=R4huxuR48gkLrdT8KqV7As2v9dZtEmcGVz6YG1ZmuJE,9692
34
+ liger_kernel/transformers/model/llama.py,sha256=RinsgC_eR-YNvZd2SHPQxZ4eyR3uViaTFCM3SvI5nks,10426
35
+ liger_kernel/transformers/model/mistral.py,sha256=XpL1rlWg_llvW3z_Hf_d8WQs7uQaH4ds7EZ2SxjQHsU,5144
36
+ liger_kernel/transformers/model/mixtral.py,sha256=nyDS1dBpsOXYC2DuW59Hgu7ZrGftrHuWPfNqjcNPIxs,11503
37
+ liger_kernel/transformers/model/mllama.py,sha256=mesNCgj0Ea1O-fqRD4LVxDJ1CR2abY_zAzK_bfVzkiU,11222
38
+ liger_kernel/transformers/model/phi3.py,sha256=xUZPlaPKwknLjHc3uUW3EPodm1h0vD3G7Qnhh51v-Io,10332
39
+ liger_kernel/transformers/model/qwen2.py,sha256=EyhSSzQOskGjSnCsKMZpd1s5IAIlHd5PBO3q0MoCs00,9619
40
+ liger_kernel/transformers/model/qwen2_vl.py,sha256=j6xAhp9AG195dsZK5f8dFYVM9uKtWApZrggT5Y08jn4,7055
41
+ liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
42
+ liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
43
+ liger_kernel-0.4.0.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
44
+ liger_kernel-0.4.0.dist-info/METADATA,sha256=DfE4CFCD-OnW5VdfxakEA_dXsYxJemAHNtfc5x8TVOc,27694
45
+ liger_kernel-0.4.0.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
46
+ liger_kernel-0.4.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
47
+ liger_kernel-0.4.0.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
48
+ liger_kernel-0.4.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,4 +0,0 @@
1
- Copyright 2024 LinkedIn Corporation
2
- All Rights Reserved.
3
-
4
- Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information.
@@ -1,42 +0,0 @@
1
- liger_kernel/env_report.py,sha256=LFUJ6UMkFFGPBYXBlqHFGy4bhsemEpSI-_1edSazlHI,1130
2
- liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- liger_kernel/ops/cross_entropy.py,sha256=6uoPScKpXJ7gdBlOpSnZcQ5fQe52JHYjUVsr_Bf4kCE,12317
4
- liger_kernel/ops/fused_linear_cross_entropy.py,sha256=XLKDHBMbqD6nH2mfFLmA1UoU-N7CpKWHp4L3itWoHCs,9321
5
- liger_kernel/ops/geglu.py,sha256=ErnNAgoMDCd8pqTh18Resl5JHCaRpRruH2jZ9_Y9CvA,4131
6
- liger_kernel/ops/kl_div.py,sha256=qnmtFQwuO3FR7Ovup_DDzpkD1A1LpwOaWlcO6K9ysHk,8342
7
- liger_kernel/ops/layer_norm.py,sha256=unGMYMOPqtkM9aTrokhcqgPmsV2AUN7Yzv86isVB9OI,7422
8
- liger_kernel/ops/rms_norm.py,sha256=4miEoDSdsc0GuhI3BpBRxt6iieFQcN2QnNp4o8PVB98,9921
9
- liger_kernel/ops/rope.py,sha256=jrzaA9-6Orn44y_IIam9_YNPQxOFK2FrIRNfFea4EtU,8513
10
- liger_kernel/ops/swiglu.py,sha256=qxNpfYUB9abS-v8yiuzQn9oYHA2P_l4wT19m8GkCa_c,2998
11
- liger_kernel/ops/utils.py,sha256=Y5sbRuZVoswsMzITTTiFgITJN2QO0K4McAAUncE3UnE,1941
12
- liger_kernel/ops/experimental/embedding.py,sha256=LYR66dB-jhvhtUjeV4PnNro-n77J1mdlmpSLSxB3Y6U,4186
13
- liger_kernel/transformers/__init__.py,sha256=UP5NP8yJhkFkjLVTkFRU0w0CA49hwdhqwmIgaBAEcj0,1148
14
- liger_kernel/transformers/auto_model.py,sha256=RMIwQHSiXoksXFTIqFZ4PLBgoqkxJJAT3q1Qh47bGN8,1552
15
- liger_kernel/transformers/cross_entropy.py,sha256=gL30VByCSA_iQSkhV6no70x_IUqqFSTMJdytppico_w,804
16
- liger_kernel/transformers/functional.py,sha256=gXviuzvWjkSLfNGUWLKDnp4s6ATpvz7309kov6JKp0Y,906
17
- liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=-07t8YRajZTrJOG2rUzt6Ur7kNuWgarWcqy7ou5Da8k,629
18
- liger_kernel/transformers/geglu.py,sha256=QcrME_8ooIn0xa59LaC0aoOdRrBIFd11Y0bAyF0NfCw,1130
19
- liger_kernel/transformers/kl_div.py,sha256=qVhjBg6tjRyue5iZ3NFxo8uySY4JuIFJyv0IM_50F24,431
20
- liger_kernel/transformers/layer_norm.py,sha256=fd6o4kSHJWolQMWxh-l1qObfgL08ruNbUoBiANKX1ow,972
21
- liger_kernel/transformers/monkey_patch.py,sha256=HtyeNNVJTOVN_UrI8piaG7_0An9-fgUXfIZfOlxx_os,28474
22
- liger_kernel/transformers/rms_norm.py,sha256=4XfMQI6dORF7s_5qUqVHKWv-3IUomaimU2dg-NwnpoM,1035
23
- liger_kernel/transformers/rope.py,sha256=m-ah8vZBYW8tfplTXCiAPMHJWlB1tdp_JPXJeWE-Boo,943
24
- liger_kernel/transformers/swiglu.py,sha256=0-tVJ8xEYfhxnduc16PflXFj8sZPxdx9sHUn3hfwCI4,2468
25
- liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
26
- liger_kernel/transformers/experimental/embedding.py,sha256=HpckiAMKM8-SRxKDcGTqortVxnjhwpZsfsp9lfjqfeM,895
27
- liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- liger_kernel/transformers/model/gemma.py,sha256=EcdkGbSj_qroTDFl0Sc_HLyDyY0xcDhwrgkM_wkXnw8,4987
29
- liger_kernel/transformers/model/llama.py,sha256=6McXLi_Bt35WuxaJ_0CzEnOtayHXiPw5vjiDsaQKdJU,5323
30
- liger_kernel/transformers/model/mistral.py,sha256=_MQJrDntlxBO5cJwgTjr2rk2nNd5FAXVnzcTg_PEekQ,5079
31
- liger_kernel/transformers/model/mixtral.py,sha256=ZwVz7zSD2S2fyyMuJgDE4grvt2VvQL-jsZeJtdwnHFk,5750
32
- liger_kernel/transformers/model/phi3.py,sha256=zmjOsVV5TjKJ0U2dCm6W-8WCx1toKoh2Wm2PZu3XOIw,4927
33
- liger_kernel/transformers/model/qwen2.py,sha256=Va4uiZaVzCG2V7XKDfHjZyYTre5vPQM02j83jnnhono,4873
34
- liger_kernel/transformers/model/qwen2_vl.py,sha256=UajJdi49tUOfa68i2WHQ_2GZBF7d_N_uwOntER3bsl8,6607
35
- liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
36
- liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
37
- liger_kernel-0.3.1.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
38
- liger_kernel-0.3.1.dist-info/METADATA,sha256=fHMAk1Nur5qcuMidT0iXL5an0DIs9aG4HDFcqzD4Gms,25763
39
- liger_kernel-0.3.1.dist-info/NOTICE,sha256=BXkXY9aWvEy_7MAB57zDu1z8uMYT1i1l9B6EpHuBa8s,173
40
- liger_kernel-0.3.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
41
- liger_kernel-0.3.1.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
42
- liger_kernel-0.3.1.dist-info/RECORD,,