liger-kernel-nightly 0.6.2.dev20250913213534__py3-none-any.whl → 0.6.2.dev20250919191028__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- liger_kernel/transformers/fused_linear_cross_entropy.py +1 -1
- liger_kernel/transformers/model/glm4v.py +1 -1
- liger_kernel/transformers/model/glm4v_moe.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534.dist-info → liger_kernel_nightly-0.6.2.dev20250919191028.dist-info}/METADATA +2 -5
- {liger_kernel_nightly-0.6.2.dev20250913213534.dist-info → liger_kernel_nightly-0.6.2.dev20250919191028.dist-info}/RECORD +9 -9
- {liger_kernel_nightly-0.6.2.dev20250913213534.dist-info → liger_kernel_nightly-0.6.2.dev20250919191028.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534.dist-info → liger_kernel_nightly-0.6.2.dev20250919191028.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534.dist-info → liger_kernel_nightly-0.6.2.dev20250919191028.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534.dist-info → liger_kernel_nightly-0.6.2.dev20250919191028.dist-info}/top_level.txt +0 -0
|
@@ -25,7 +25,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
|
25
25
|
assert reduction in {
|
|
26
26
|
"mean",
|
|
27
27
|
"sum",
|
|
28
|
-
"none",
|
|
28
|
+
"none",
|
|
29
29
|
}, f"reduction must be 'mean' or 'sum' or 'none'. Got: {reduction}"
|
|
30
30
|
assert softcap is None or softcap > 0, f"softcap must greater than 0.0 or None. Got: {softcap}"
|
|
31
31
|
self.ce_weight = ce_weight
|
|
@@ -70,7 +70,7 @@ def lce_forward(
|
|
|
70
70
|
>>> processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
|
71
71
|
>>> model = Glm4vForConditionalGeneration.from_pretrained(
|
|
72
72
|
pretrained_model_name_or_path=MODEL_PATH,
|
|
73
|
-
|
|
73
|
+
dtype=torch.bfloat16,
|
|
74
74
|
device_map="auto",
|
|
75
75
|
)
|
|
76
76
|
>>> inputs = processor.apply_chat_template(
|
|
@@ -75,7 +75,7 @@ def lce_forward(
|
|
|
75
75
|
>>> processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
|
76
76
|
>>> model = Glm4vMoeForConditionalGeneration.from_pretrained(
|
|
77
77
|
pretrained_model_name_or_path=MODEL_PATH,
|
|
78
|
-
|
|
78
|
+
dtype="auto",
|
|
79
79
|
device_map="auto",
|
|
80
80
|
)
|
|
81
81
|
>>> inputs = processor.apply_chat_template(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.2.
|
|
3
|
+
Version: 0.6.2.dev20250919191028
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -35,9 +35,7 @@ Requires-Dist: triton>=2.3.1
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: black>=24.4.2; extra == "dev"
|
|
40
|
-
Requires-Dist: isort>=5.13.2; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.12.0; extra == "dev"
|
|
41
39
|
Requires-Dist: pytest>=7.1.2; extra == "dev"
|
|
42
40
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
43
41
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -45,7 +43,6 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
45
43
|
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
|
46
44
|
Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
47
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
48
|
-
Requires-Dist: mkdocs; extra == "dev"
|
|
49
46
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
50
47
|
Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
51
48
|
|
|
@@ -48,7 +48,7 @@ liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4n
|
|
|
48
48
|
liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
|
|
49
49
|
liger_kernel/transformers/functional.py,sha256=-vpz95wbv5wLpInjSG06KNHETsEgKnRIiV-lMYHVs68,7841
|
|
50
50
|
liger_kernel/transformers/fused_add_rms_norm.py,sha256=7_Bzg-x6lLe6W1qG2DtjDALhEpNZlC6N5GppEs9cTYY,1199
|
|
51
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=
|
|
51
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=toa54dpmJduoZLhU3lJA-HPZ03MYcMKekDWPcdYjvYA,2020
|
|
52
52
|
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
53
53
|
liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
|
|
54
54
|
liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
|
|
@@ -75,8 +75,8 @@ liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7
|
|
|
75
75
|
liger_kernel/transformers/model/gemma2.py,sha256=R_JFPyWTk7RyA7D05ZiIaNO5pX8gWcvfWf-6rdCRMxs,11296
|
|
76
76
|
liger_kernel/transformers/model/gemma3.py,sha256=FKO4j3t4W_5uECRA1lhVnXC-It2GhirHm4tpCf9ApAc,12785
|
|
77
77
|
liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
|
|
78
|
-
liger_kernel/transformers/model/glm4v.py,sha256=
|
|
79
|
-
liger_kernel/transformers/model/glm4v_moe.py,sha256=
|
|
78
|
+
liger_kernel/transformers/model/glm4v.py,sha256=dE9rRx1bOIr4T9xSXj58dtukaR80szeuclbgX0A0ovg,5951
|
|
79
|
+
liger_kernel/transformers/model/glm4v_moe.py,sha256=Nij7tIYGkcBhJUMvQhYoFd1tVT4pbkxc2k4MSvN2pak,6166
|
|
80
80
|
liger_kernel/transformers/model/llama.py,sha256=i8jJgyZsMKWQ-zKloETLugtwFpUOdaWxLDceciFXKd4,12832
|
|
81
81
|
liger_kernel/transformers/model/llama4.py,sha256=IgbB8sTh3dlETQnaNNy1bZLuXy-Nt7qmeAjF27ydGpg,4210
|
|
82
82
|
liger_kernel/transformers/model/llava.py,sha256=bLCioday_SOm69ogMDBhy_4UsVkH2-BSl93-EXY6-7I,15076
|
|
@@ -97,9 +97,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
|
97
97
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
|
98
98
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
99
99
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
100
|
-
liger_kernel_nightly-0.6.2.
|
|
101
|
-
liger_kernel_nightly-0.6.2.
|
|
102
|
-
liger_kernel_nightly-0.6.2.
|
|
103
|
-
liger_kernel_nightly-0.6.2.
|
|
104
|
-
liger_kernel_nightly-0.6.2.
|
|
105
|
-
liger_kernel_nightly-0.6.2.
|
|
100
|
+
liger_kernel_nightly-0.6.2.dev20250919191028.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
101
|
+
liger_kernel_nightly-0.6.2.dev20250919191028.dist-info/METADATA,sha256=x12rwCMObA88WbKjXB3WRmeFjGhmAEAIYQAZKJVg8xk,24461
|
|
102
|
+
liger_kernel_nightly-0.6.2.dev20250919191028.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
103
|
+
liger_kernel_nightly-0.6.2.dev20250919191028.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
104
|
+
liger_kernel_nightly-0.6.2.dev20250919191028.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
105
|
+
liger_kernel_nightly-0.6.2.dev20250919191028.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|