liger-kernel 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/ops/cross_entropy.py +5 -39
- liger_kernel/ops/experimental/mm_int8int2.py +355 -0
- liger_kernel/ops/fused_linear_cross_entropy.py +12 -9
- liger_kernel/ops/fused_linear_jsd.py +245 -0
- liger_kernel/ops/geglu.py +2 -2
- liger_kernel/ops/jsd.py +176 -0
- liger_kernel/ops/kl_div.py +2 -2
- liger_kernel/ops/rms_norm.py +67 -42
- liger_kernel/ops/swiglu.py +2 -2
- liger_kernel/ops/utils.py +62 -1
- liger_kernel/transformers/__init__.py +3 -0
- liger_kernel/transformers/functional.py +4 -0
- liger_kernel/transformers/fused_linear_jsd.py +98 -0
- liger_kernel/transformers/jsd.py +75 -0
- liger_kernel/transformers/model/gemma.py +124 -1
- liger_kernel/transformers/model/llama.py +135 -4
- liger_kernel/transformers/model/mistral.py +3 -0
- liger_kernel/transformers/model/mixtral.py +153 -2
- liger_kernel/transformers/model/mllama.py +274 -0
- liger_kernel/transformers/model/phi3.py +140 -2
- liger_kernel/transformers/model/qwen2.py +123 -2
- liger_kernel/transformers/model/qwen2_vl.py +8 -1
- liger_kernel/transformers/monkey_patch.py +158 -7
- {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/METADATA +60 -28
- liger_kernel-0.4.0.dist-info/NOTICE +58 -0
- liger_kernel-0.4.0.dist-info/RECORD +48 -0
- {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/WHEEL +1 -1
- liger_kernel-0.3.1.dist-info/NOTICE +0 -4
- liger_kernel-0.3.1.dist-info/RECORD +0 -42
- {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/LICENSE +0 -0
- {liger_kernel-0.3.1.dist-info → liger_kernel-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
Copyright 2024 LinkedIn Corporation
|
|
2
|
+
All Rights Reserved.
|
|
3
|
+
|
|
4
|
+
Licensed under the BSD 2-Clause License (the "License"). See License in the project root for license information.
|
|
5
|
+
|
|
6
|
+
This product includes software developed by LinkedIn Corporation.
|
|
7
|
+
|
|
8
|
+
This product contains code derived from the following open source projects:
|
|
9
|
+
|
|
10
|
+
1. Unsloth
|
|
11
|
+
Copyright (c) 2023 Unsloth AI
|
|
12
|
+
Licensed under the Apache License, Version 2.0
|
|
13
|
+
Source: https://github.com/unslothai/unsloth
|
|
14
|
+
|
|
15
|
+
The `calculate_settings` function to determine block size and warp is reused for Norm and MLP operations.
|
|
16
|
+
Modifications and additions were made to the RMS Norm implementation.
|
|
17
|
+
|
|
18
|
+
2. Triton
|
|
19
|
+
Copyright (c) 2023 OpenAI
|
|
20
|
+
Licensed under the MIT License
|
|
21
|
+
Source: https://github.com/openai/triton
|
|
22
|
+
|
|
23
|
+
Modifications were made based on Triton tutorials for the RMS Norm implementation.
|
|
24
|
+
|
|
25
|
+
3. Efficient Cross Entropy
|
|
26
|
+
Copyright (c) 2023 Mohamed Malek
|
|
27
|
+
Licensed under the MIT License
|
|
28
|
+
Source: https://github.com/mgmalek/efficient_cross_entropy
|
|
29
|
+
|
|
30
|
+
The idea of gradient-in-forward and chunking was used in the Linear Cross Entropy implementation.
|
|
31
|
+
|
|
32
|
+
4. Flash Attention
|
|
33
|
+
Copyright (c) 2023 Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, Christopher Ré
|
|
34
|
+
Licensed under the BSD 3-Clause License
|
|
35
|
+
Source: https://github.com/Dao-AILab/flash-attention
|
|
36
|
+
|
|
37
|
+
Optimization ideas such as tiling and recomputation were inspired by this work.
|
|
38
|
+
|
|
39
|
+
5. AutoAWQ
|
|
40
|
+
Copyright (c) 2023 Casper Hansen
|
|
41
|
+
Licensed under the MIT License
|
|
42
|
+
Source: https://github.com/casper-hansen/AutoAWQ
|
|
43
|
+
|
|
44
|
+
The design of the automodel was referenced from this project.
|
|
45
|
+
|
|
46
|
+
6. llm.c
|
|
47
|
+
Copyright (c) 2023 Andrej Karpathy
|
|
48
|
+
Licensed under the MIT License
|
|
49
|
+
Source: https://github.com/karpathy/llm.c
|
|
50
|
+
|
|
51
|
+
The design of end-to-end testing was referenced from this project.
|
|
52
|
+
|
|
53
|
+
7. Tiny Shakespeare Dataset
|
|
54
|
+
Source: https://huggingface.co/datasets/karpathy/tiny_shakespeare
|
|
55
|
+
|
|
56
|
+
This dataset is used to conduct convergence tests on mini models.
|
|
57
|
+
|
|
58
|
+
For full license texts, please refer to the respective project repositories.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
liger_kernel/env_report.py,sha256=LFUJ6UMkFFGPBYXBlqHFGy4bhsemEpSI-_1edSazlHI,1130
|
|
2
|
+
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
liger_kernel/ops/cross_entropy.py,sha256=23Di7l0T20OBj8K3-0PYEA5FCJrrbiKs3xMGyLlzbtg,11248
|
|
4
|
+
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=M-cF4BO-vvso2BIdk7-Q2FleeFPhqSQwZR1EirPC4OE,9456
|
|
5
|
+
liger_kernel/ops/fused_linear_jsd.py,sha256=5D_obamh08lGGTMyh85kBJD_aNjPhOYf4-TmCZ6m4s4,9626
|
|
6
|
+
liger_kernel/ops/geglu.py,sha256=MQL4zyzneZqZYUGPvb1QjI_EYT9_pKfSDgR25WD9jrI,4127
|
|
7
|
+
liger_kernel/ops/jsd.py,sha256=anWfdioucxZy4JQfTvbHBR-IQrZKeH-gBF1MHwwTuTQ,5781
|
|
8
|
+
liger_kernel/ops/kl_div.py,sha256=03FNXfvCb6M-56hhFepAFV9p6brArPR6KOKkdGD34mw,8374
|
|
9
|
+
liger_kernel/ops/layer_norm.py,sha256=unGMYMOPqtkM9aTrokhcqgPmsV2AUN7Yzv86isVB9OI,7422
|
|
10
|
+
liger_kernel/ops/rms_norm.py,sha256=9S9wyZLmzNyJlBxV4vbv4p5es7bGP-m_5wK9JC6JIdA,10911
|
|
11
|
+
liger_kernel/ops/rope.py,sha256=jrzaA9-6Orn44y_IIam9_YNPQxOFK2FrIRNfFea4EtU,8513
|
|
12
|
+
liger_kernel/ops/swiglu.py,sha256=Fwxtd76rhHKT9ShQAGca9RsnASplAVxtYKHmiT73_yA,2994
|
|
13
|
+
liger_kernel/ops/utils.py,sha256=3JSF--O7KT5Wa5BuO70M4h0XetxoZ_e9IoW9GRlxlBg,3777
|
|
14
|
+
liger_kernel/ops/experimental/embedding.py,sha256=LYR66dB-jhvhtUjeV4PnNro-n77J1mdlmpSLSxB3Y6U,4186
|
|
15
|
+
liger_kernel/ops/experimental/mm_int8int2.py,sha256=JpGVZCgRC6T8XMUJ_QbZRS2XU1bh0urIZphs5DTc1mY,13358
|
|
16
|
+
liger_kernel/transformers/__init__.py,sha256=gia-eBxr7TLxU0GdDf8AfCY4WgDlFLqIGSt7EoQGsBA,1336
|
|
17
|
+
liger_kernel/transformers/auto_model.py,sha256=RMIwQHSiXoksXFTIqFZ4PLBgoqkxJJAT3q1Qh47bGN8,1552
|
|
18
|
+
liger_kernel/transformers/cross_entropy.py,sha256=gL30VByCSA_iQSkhV6no70x_IUqqFSTMJdytppico_w,804
|
|
19
|
+
liger_kernel/transformers/functional.py,sha256=zlQ1yKOIZe-ZGmFicRMrlFJfAt8zzBWu8L4GVQbp_e8,1124
|
|
20
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=-07t8YRajZTrJOG2rUzt6Ur7kNuWgarWcqy7ou5Da8k,629
|
|
21
|
+
liger_kernel/transformers/fused_linear_jsd.py,sha256=MJ-KjmLZnakuoVpnbDGkd95DQgvESniyrRWYzollVZM,4066
|
|
22
|
+
liger_kernel/transformers/geglu.py,sha256=QcrME_8ooIn0xa59LaC0aoOdRrBIFd11Y0bAyF0NfCw,1130
|
|
23
|
+
liger_kernel/transformers/jsd.py,sha256=W-5CypO2mx4-bUWOxq1KScfCdoXlLoYbtt5xBnRzMs4,3056
|
|
24
|
+
liger_kernel/transformers/kl_div.py,sha256=qVhjBg6tjRyue5iZ3NFxo8uySY4JuIFJyv0IM_50F24,431
|
|
25
|
+
liger_kernel/transformers/layer_norm.py,sha256=fd6o4kSHJWolQMWxh-l1qObfgL08ruNbUoBiANKX1ow,972
|
|
26
|
+
liger_kernel/transformers/monkey_patch.py,sha256=qetRIZmdHIDxE0TtWP5-rWS91NuGgRYRZBTqzJUojkI,35507
|
|
27
|
+
liger_kernel/transformers/rms_norm.py,sha256=4XfMQI6dORF7s_5qUqVHKWv-3IUomaimU2dg-NwnpoM,1035
|
|
28
|
+
liger_kernel/transformers/rope.py,sha256=m-ah8vZBYW8tfplTXCiAPMHJWlB1tdp_JPXJeWE-Boo,943
|
|
29
|
+
liger_kernel/transformers/swiglu.py,sha256=0-tVJ8xEYfhxnduc16PflXFj8sZPxdx9sHUn3hfwCI4,2468
|
|
30
|
+
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
31
|
+
liger_kernel/transformers/experimental/embedding.py,sha256=HpckiAMKM8-SRxKDcGTqortVxnjhwpZsfsp9lfjqfeM,895
|
|
32
|
+
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
liger_kernel/transformers/model/gemma.py,sha256=R4huxuR48gkLrdT8KqV7As2v9dZtEmcGVz6YG1ZmuJE,9692
|
|
34
|
+
liger_kernel/transformers/model/llama.py,sha256=RinsgC_eR-YNvZd2SHPQxZ4eyR3uViaTFCM3SvI5nks,10426
|
|
35
|
+
liger_kernel/transformers/model/mistral.py,sha256=XpL1rlWg_llvW3z_Hf_d8WQs7uQaH4ds7EZ2SxjQHsU,5144
|
|
36
|
+
liger_kernel/transformers/model/mixtral.py,sha256=nyDS1dBpsOXYC2DuW59Hgu7ZrGftrHuWPfNqjcNPIxs,11503
|
|
37
|
+
liger_kernel/transformers/model/mllama.py,sha256=mesNCgj0Ea1O-fqRD4LVxDJ1CR2abY_zAzK_bfVzkiU,11222
|
|
38
|
+
liger_kernel/transformers/model/phi3.py,sha256=xUZPlaPKwknLjHc3uUW3EPodm1h0vD3G7Qnhh51v-Io,10332
|
|
39
|
+
liger_kernel/transformers/model/qwen2.py,sha256=EyhSSzQOskGjSnCsKMZpd1s5IAIlHd5PBO3q0MoCs00,9619
|
|
40
|
+
liger_kernel/transformers/model/qwen2_vl.py,sha256=j6xAhp9AG195dsZK5f8dFYVM9uKtWApZrggT5Y08jn4,7055
|
|
41
|
+
liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
|
|
42
|
+
liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
|
|
43
|
+
liger_kernel-0.4.0.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
44
|
+
liger_kernel-0.4.0.dist-info/METADATA,sha256=DfE4CFCD-OnW5VdfxakEA_dXsYxJemAHNtfc5x8TVOc,27694
|
|
45
|
+
liger_kernel-0.4.0.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
46
|
+
liger_kernel-0.4.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
47
|
+
liger_kernel-0.4.0.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
48
|
+
liger_kernel-0.4.0.dist-info/RECORD,,
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
liger_kernel/env_report.py,sha256=LFUJ6UMkFFGPBYXBlqHFGy4bhsemEpSI-_1edSazlHI,1130
|
|
2
|
-
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
-
liger_kernel/ops/cross_entropy.py,sha256=6uoPScKpXJ7gdBlOpSnZcQ5fQe52JHYjUVsr_Bf4kCE,12317
|
|
4
|
-
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=XLKDHBMbqD6nH2mfFLmA1UoU-N7CpKWHp4L3itWoHCs,9321
|
|
5
|
-
liger_kernel/ops/geglu.py,sha256=ErnNAgoMDCd8pqTh18Resl5JHCaRpRruH2jZ9_Y9CvA,4131
|
|
6
|
-
liger_kernel/ops/kl_div.py,sha256=qnmtFQwuO3FR7Ovup_DDzpkD1A1LpwOaWlcO6K9ysHk,8342
|
|
7
|
-
liger_kernel/ops/layer_norm.py,sha256=unGMYMOPqtkM9aTrokhcqgPmsV2AUN7Yzv86isVB9OI,7422
|
|
8
|
-
liger_kernel/ops/rms_norm.py,sha256=4miEoDSdsc0GuhI3BpBRxt6iieFQcN2QnNp4o8PVB98,9921
|
|
9
|
-
liger_kernel/ops/rope.py,sha256=jrzaA9-6Orn44y_IIam9_YNPQxOFK2FrIRNfFea4EtU,8513
|
|
10
|
-
liger_kernel/ops/swiglu.py,sha256=qxNpfYUB9abS-v8yiuzQn9oYHA2P_l4wT19m8GkCa_c,2998
|
|
11
|
-
liger_kernel/ops/utils.py,sha256=Y5sbRuZVoswsMzITTTiFgITJN2QO0K4McAAUncE3UnE,1941
|
|
12
|
-
liger_kernel/ops/experimental/embedding.py,sha256=LYR66dB-jhvhtUjeV4PnNro-n77J1mdlmpSLSxB3Y6U,4186
|
|
13
|
-
liger_kernel/transformers/__init__.py,sha256=UP5NP8yJhkFkjLVTkFRU0w0CA49hwdhqwmIgaBAEcj0,1148
|
|
14
|
-
liger_kernel/transformers/auto_model.py,sha256=RMIwQHSiXoksXFTIqFZ4PLBgoqkxJJAT3q1Qh47bGN8,1552
|
|
15
|
-
liger_kernel/transformers/cross_entropy.py,sha256=gL30VByCSA_iQSkhV6no70x_IUqqFSTMJdytppico_w,804
|
|
16
|
-
liger_kernel/transformers/functional.py,sha256=gXviuzvWjkSLfNGUWLKDnp4s6ATpvz7309kov6JKp0Y,906
|
|
17
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=-07t8YRajZTrJOG2rUzt6Ur7kNuWgarWcqy7ou5Da8k,629
|
|
18
|
-
liger_kernel/transformers/geglu.py,sha256=QcrME_8ooIn0xa59LaC0aoOdRrBIFd11Y0bAyF0NfCw,1130
|
|
19
|
-
liger_kernel/transformers/kl_div.py,sha256=qVhjBg6tjRyue5iZ3NFxo8uySY4JuIFJyv0IM_50F24,431
|
|
20
|
-
liger_kernel/transformers/layer_norm.py,sha256=fd6o4kSHJWolQMWxh-l1qObfgL08ruNbUoBiANKX1ow,972
|
|
21
|
-
liger_kernel/transformers/monkey_patch.py,sha256=HtyeNNVJTOVN_UrI8piaG7_0An9-fgUXfIZfOlxx_os,28474
|
|
22
|
-
liger_kernel/transformers/rms_norm.py,sha256=4XfMQI6dORF7s_5qUqVHKWv-3IUomaimU2dg-NwnpoM,1035
|
|
23
|
-
liger_kernel/transformers/rope.py,sha256=m-ah8vZBYW8tfplTXCiAPMHJWlB1tdp_JPXJeWE-Boo,943
|
|
24
|
-
liger_kernel/transformers/swiglu.py,sha256=0-tVJ8xEYfhxnduc16PflXFj8sZPxdx9sHUn3hfwCI4,2468
|
|
25
|
-
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
26
|
-
liger_kernel/transformers/experimental/embedding.py,sha256=HpckiAMKM8-SRxKDcGTqortVxnjhwpZsfsp9lfjqfeM,895
|
|
27
|
-
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
liger_kernel/transformers/model/gemma.py,sha256=EcdkGbSj_qroTDFl0Sc_HLyDyY0xcDhwrgkM_wkXnw8,4987
|
|
29
|
-
liger_kernel/transformers/model/llama.py,sha256=6McXLi_Bt35WuxaJ_0CzEnOtayHXiPw5vjiDsaQKdJU,5323
|
|
30
|
-
liger_kernel/transformers/model/mistral.py,sha256=_MQJrDntlxBO5cJwgTjr2rk2nNd5FAXVnzcTg_PEekQ,5079
|
|
31
|
-
liger_kernel/transformers/model/mixtral.py,sha256=ZwVz7zSD2S2fyyMuJgDE4grvt2VvQL-jsZeJtdwnHFk,5750
|
|
32
|
-
liger_kernel/transformers/model/phi3.py,sha256=zmjOsVV5TjKJ0U2dCm6W-8WCx1toKoh2Wm2PZu3XOIw,4927
|
|
33
|
-
liger_kernel/transformers/model/qwen2.py,sha256=Va4uiZaVzCG2V7XKDfHjZyYTre5vPQM02j83jnnhono,4873
|
|
34
|
-
liger_kernel/transformers/model/qwen2_vl.py,sha256=UajJdi49tUOfa68i2WHQ_2GZBF7d_N_uwOntER3bsl8,6607
|
|
35
|
-
liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
|
|
36
|
-
liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
|
|
37
|
-
liger_kernel-0.3.1.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
38
|
-
liger_kernel-0.3.1.dist-info/METADATA,sha256=fHMAk1Nur5qcuMidT0iXL5an0DIs9aG4HDFcqzD4Gms,25763
|
|
39
|
-
liger_kernel-0.3.1.dist-info/NOTICE,sha256=BXkXY9aWvEy_7MAB57zDu1z8uMYT1i1l9B6EpHuBa8s,173
|
|
40
|
-
liger_kernel-0.3.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
41
|
-
liger_kernel-0.3.1.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
42
|
-
liger_kernel-0.3.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|