liger-kernel 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/chunked_loss/README.md +25 -0
- liger_kernel/chunked_loss/__init__.py +3 -0
- liger_kernel/chunked_loss/cpo_loss.py +18 -8
- liger_kernel/chunked_loss/dpo_loss.py +20 -10
- liger_kernel/chunked_loss/functional.py +4 -0
- liger_kernel/chunked_loss/fused_linear_distillation.py +58 -44
- liger_kernel/chunked_loss/fused_linear_preference.py +108 -60
- liger_kernel/chunked_loss/fused_linear_rlhf.py +213 -0
- liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +246 -0
- liger_kernel/chunked_loss/grpo_loss.py +160 -0
- liger_kernel/chunked_loss/jsd_loss.py +154 -0
- liger_kernel/chunked_loss/kto_loss.py +172 -0
- liger_kernel/chunked_loss/orpo_loss.py +8 -9
- liger_kernel/chunked_loss/simpo_loss.py +22 -8
- liger_kernel/env_report.py +5 -12
- liger_kernel/ops/cross_entropy.py +102 -51
- liger_kernel/ops/experimental/embedding.py +1 -3
- liger_kernel/ops/experimental/mm_int8int2.py +3 -9
- liger_kernel/ops/fused_linear_cross_entropy.py +89 -55
- liger_kernel/ops/fused_linear_jsd.py +14 -32
- liger_kernel/ops/geglu.py +6 -17
- liger_kernel/ops/group_norm.py +11 -28
- liger_kernel/ops/jsd.py +5 -9
- liger_kernel/ops/kl_div.py +8 -11
- liger_kernel/ops/layer_norm.py +23 -12
- liger_kernel/ops/qwen2vl_mrope.py +8 -25
- liger_kernel/ops/rms_norm.py +14 -32
- liger_kernel/ops/rope.py +31 -33
- liger_kernel/ops/swiglu.py +4 -8
- liger_kernel/ops/tvd.py +207 -0
- liger_kernel/ops/utils.py +3 -2
- liger_kernel/transformers/__init__.py +19 -24
- liger_kernel/transformers/auto_model.py +6 -13
- liger_kernel/transformers/cross_entropy.py +7 -9
- liger_kernel/transformers/experimental/embedding.py +1 -3
- liger_kernel/transformers/functional.py +28 -7
- liger_kernel/transformers/fused_linear_cross_entropy.py +15 -10
- liger_kernel/transformers/geglu.py +1 -4
- liger_kernel/transformers/group_norm.py +9 -15
- liger_kernel/transformers/jsd.py +1 -3
- liger_kernel/transformers/kl_div.py +1 -3
- liger_kernel/transformers/layer_norm.py +3 -9
- liger_kernel/transformers/model/gemma.py +18 -40
- liger_kernel/transformers/model/gemma2.py +19 -41
- liger_kernel/transformers/model/llama.py +22 -48
- liger_kernel/transformers/model/mistral.py +14 -26
- liger_kernel/transformers/model/mixtral.py +24 -54
- liger_kernel/transformers/model/mllama.py +16 -36
- liger_kernel/transformers/model/olmo2.py +124 -0
- liger_kernel/transformers/model/phi3.py +18 -40
- liger_kernel/transformers/model/qwen2.py +18 -40
- liger_kernel/transformers/model/qwen2_vl.py +36 -32
- liger_kernel/transformers/monkey_patch.py +214 -144
- liger_kernel/transformers/rms_norm.py +4 -4
- liger_kernel/transformers/rope.py +2 -2
- liger_kernel/transformers/swiglu.py +2 -8
- liger_kernel/transformers/trainer/__init__.py +1 -3
- liger_kernel/transformers/trainer/orpo_trainer.py +31 -18
- liger_kernel/transformers/tvd.py +13 -0
- liger_kernel/triton/__init__.py +1 -3
- liger_kernel/triton/monkey_patch.py +1 -3
- liger_kernel/utils.py +49 -0
- {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/METADATA +53 -26
- liger_kernel-0.5.4.dist-info/RECORD +74 -0
- {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/WHEEL +1 -1
- liger_kernel-0.5.2.dist-info/RECORD +0 -65
- {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/LICENSE +0 -0
- {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/NOTICE +0 -0
- {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,14 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
|
+
from typing import Callable
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
from typing import Literal
|
|
6
|
+
from typing import Tuple
|
|
7
|
+
from typing import Union
|
|
2
8
|
|
|
3
9
|
import torch
|
|
4
10
|
import torch.nn as nn
|
|
11
|
+
|
|
5
12
|
from torch.distributed.fsdp import FullyShardedDataParallel
|
|
6
13
|
from trl.trainer import ORPOTrainer
|
|
7
14
|
|
|
@@ -17,7 +24,7 @@ class _FSDPForwardRedirection:
|
|
|
17
24
|
This is needed in cases where we call a submodule of a FSDP module. For instance, when we want to call only
|
|
18
25
|
the `LlamaModel` part out of a FSDP-wrapped `LlamaForCausalLM` to get the hidden states without involving
|
|
19
26
|
GPU-memory-heavy `lm_head` and cross entropy computation, doing this directly (i.e. `model.model.forward()`)
|
|
20
|
-
will not work because the first `nn.
|
|
27
|
+
will not work because the first `nn.Embedding` layer is not independently wrapped as a FSDP module (because of
|
|
21
28
|
the transformer-based wrapping policy), and not calling it through FSDP root module forward will not all-gather
|
|
22
29
|
its parameter, thus resulting in "RuntimeError: 'weight' must be 2-D" error. Similarly, if we want to call just
|
|
23
30
|
the `lm_head` part of a model, we need this trick too to properly get its params all-gathered.
|
|
@@ -62,9 +69,7 @@ class _FSDPForwardRedirection:
|
|
|
62
69
|
class LigerORPOTrainer(ORPOTrainer):
|
|
63
70
|
def concatenated_forward(
|
|
64
71
|
self, model: nn.Module, batch: Dict[str, Union[List, torch.LongTensor]]
|
|
65
|
-
) -> Tuple[
|
|
66
|
-
torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor
|
|
67
|
-
]:
|
|
72
|
+
) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
|
|
68
73
|
"""
|
|
69
74
|
Run the given model on the given batch of inputs, concatenating the chosen and rejected inputs together.
|
|
70
75
|
We do this to avoid doing two forward passes, because it's faster for FSDP.
|
|
@@ -79,9 +84,7 @@ class LigerORPOTrainer(ORPOTrainer):
|
|
|
79
84
|
|
|
80
85
|
model_kwargs = (
|
|
81
86
|
{
|
|
82
|
-
"decoder_input_ids": self._shift_right(
|
|
83
|
-
concatenated_batch["concatenated_labels"]
|
|
84
|
-
),
|
|
87
|
+
"decoder_input_ids": self._shift_right(concatenated_batch["concatenated_labels"]),
|
|
85
88
|
}
|
|
86
89
|
if self.is_encoder_decoder
|
|
87
90
|
else {}
|
|
@@ -90,6 +93,13 @@ class LigerORPOTrainer(ORPOTrainer):
|
|
|
90
93
|
if self.aux_loss_enabled:
|
|
91
94
|
model_kwargs["output_router_logits"] = True
|
|
92
95
|
|
|
96
|
+
if self.is_encoder_decoder:
|
|
97
|
+
labels = concatenated_batch["concatenated_labels"].clone()
|
|
98
|
+
else:
|
|
99
|
+
labels = concatenated_batch["concatenated_input_ids"].clone()
|
|
100
|
+
attention_mask = concatenated_batch["concatenated_attention_mask"]
|
|
101
|
+
labels = torch.where(attention_mask == 1, labels, self.label_pad_token_id)
|
|
102
|
+
|
|
93
103
|
if isinstance(model, FullyShardedDataParallel):
|
|
94
104
|
outputs = _FSDPForwardRedirection()(
|
|
95
105
|
model,
|
|
@@ -109,22 +119,27 @@ class LigerORPOTrainer(ORPOTrainer):
|
|
|
109
119
|
**model_kwargs,
|
|
110
120
|
)
|
|
111
121
|
|
|
112
|
-
orpo_loss_fn = LigerFusedLinearORPOLoss(
|
|
113
|
-
ignore_index=self.label_pad_token_id, beta=self.beta
|
|
114
|
-
)
|
|
122
|
+
orpo_loss_fn = LigerFusedLinearORPOLoss(ignore_index=self.label_pad_token_id, beta=self.beta)
|
|
115
123
|
|
|
116
|
-
def orpo_partial(lm_head, last_hidden_state, concatenated_labels):
|
|
124
|
+
def orpo_partial(lm_head, last_hidden_state, concatenated_labels, nll_target):
|
|
117
125
|
return orpo_loss_fn(
|
|
118
|
-
lm_head.weight, last_hidden_state, concatenated_labels, lm_head.bias
|
|
126
|
+
lm_head.weight, last_hidden_state, concatenated_labels, lm_head.bias, nll_target=nll_target
|
|
119
127
|
)
|
|
120
128
|
|
|
121
129
|
orpo_loss, aux_outputs = _FSDPForwardRedirection()(
|
|
122
130
|
model,
|
|
123
131
|
orpo_partial,
|
|
124
132
|
model.lm_head,
|
|
125
|
-
outputs.last_hidden_state,
|
|
126
|
-
concatenated_batch["concatenated_labels"]
|
|
133
|
+
outputs.last_hidden_state[:, :-1] if not self.is_encoder_decoder else outputs.last_hidden_state,
|
|
134
|
+
concatenated_batch["concatenated_labels"][:, 1:]
|
|
135
|
+
if not self.is_encoder_decoder
|
|
136
|
+
else concatenated_batch["concatenated_labels"],
|
|
137
|
+
labels[:, 1:] if not self.is_encoder_decoder else labels,
|
|
127
138
|
)
|
|
139
|
+
# if aux_loss_enabled, add the aux_loss to the orpo_loss
|
|
140
|
+
if self.aux_loss_enabled:
|
|
141
|
+
orpo_loss += self.aux_loss_coef * outputs.aux_loss
|
|
142
|
+
|
|
128
143
|
return orpo_loss, aux_outputs
|
|
129
144
|
|
|
130
145
|
def get_batch_loss_metrics(
|
|
@@ -145,9 +160,7 @@ class LigerORPOTrainer(ORPOTrainer):
|
|
|
145
160
|
) = aux_outputs[:5]
|
|
146
161
|
|
|
147
162
|
# return loss, metrics
|
|
148
|
-
chosen_rewards, rejected_rewards, log_odds_ratio, log_odds_chosen = aux_outputs[
|
|
149
|
-
5:
|
|
150
|
-
]
|
|
163
|
+
chosen_rewards, rejected_rewards, log_odds_ratio, log_odds_chosen = aux_outputs[5:]
|
|
151
164
|
|
|
152
165
|
reward_accuracies = (chosen_rewards > rejected_rewards).float()
|
|
153
166
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import torch.nn as nn
|
|
2
|
+
|
|
3
|
+
from liger_kernel.ops.tvd import LigerTVDLossFunction
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LigerTVDLoss(nn.Module):
|
|
7
|
+
def __init__(self, reduction="batchmean", ignore_index: int = -100):
|
|
8
|
+
super(LigerTVDLoss, self).__init__()
|
|
9
|
+
self.reduction = reduction
|
|
10
|
+
self.ignore_index = ignore_index
|
|
11
|
+
|
|
12
|
+
def forward(self, p, q, shift_labels=None):
|
|
13
|
+
return LigerTVDLossFunction.apply(p, q, shift_labels, self.reduction, self.ignore_index)
|
liger_kernel/triton/__init__.py
CHANGED
|
@@ -37,6 +37,4 @@ def apply_liger_triton_cache_manager():
|
|
|
37
37
|
Experimental feature to get around transient FileNotFoundError in triton compilation.
|
|
38
38
|
For more details please see https://github.com/triton-lang/triton/pull/4295
|
|
39
39
|
"""
|
|
40
|
-
os.environ["TRITON_CACHE_MANAGER"] =
|
|
41
|
-
"liger_kernel.triton.monkey_patch:LigerTritonFileCacheManager"
|
|
42
|
-
)
|
|
40
|
+
os.environ["TRITON_CACHE_MANAGER"] = "liger_kernel.triton.monkey_patch:LigerTritonFileCacheManager"
|
liger_kernel/utils.py
CHANGED
|
@@ -9,5 +9,54 @@ def infer_device():
|
|
|
9
9
|
return "cuda"
|
|
10
10
|
elif torch.xpu.is_available():
|
|
11
11
|
return "xpu"
|
|
12
|
+
elif torch.hip.is_available():
|
|
13
|
+
return "hip"
|
|
12
14
|
else:
|
|
13
15
|
return "cpu"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def transformers_version_dispatch(
|
|
19
|
+
required_version: str,
|
|
20
|
+
before_fn,
|
|
21
|
+
after_fn,
|
|
22
|
+
before_args: tuple = (),
|
|
23
|
+
after_args: tuple = (),
|
|
24
|
+
before_kwargs: dict = None,
|
|
25
|
+
after_kwargs: dict = None,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Dispatches to different functions based on package version comparison.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
required_version: Version to compare against (e.g. "4.48.0")
|
|
32
|
+
before_fn: Function to call if package_version < required_version
|
|
33
|
+
after_fn: Function to call if package_version >= required_version
|
|
34
|
+
before_args: Positional arguments for before_fn
|
|
35
|
+
after_args: Positional arguments for after_fn
|
|
36
|
+
before_kwargs: Keyword arguments for before_fn
|
|
37
|
+
after_kwargs: Keyword arguments for after_fn
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Result from either before_fn or after_fn
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> rotary_emb = transformers_version_dispatch(
|
|
44
|
+
... "4.48.0",
|
|
45
|
+
... LlamaRotaryEmbedding,
|
|
46
|
+
... LlamaRotaryEmbedding,
|
|
47
|
+
... before_args=(head_dim,),
|
|
48
|
+
... after_args=(LlamaConfig(head_dim=head_dim),),
|
|
49
|
+
... before_kwargs={'device': device},
|
|
50
|
+
... after_kwargs={'device': device}
|
|
51
|
+
... )
|
|
52
|
+
"""
|
|
53
|
+
from packaging import version
|
|
54
|
+
from transformers import __version__ as transformers_version
|
|
55
|
+
|
|
56
|
+
before_kwargs = before_kwargs or {}
|
|
57
|
+
after_kwargs = after_kwargs or {}
|
|
58
|
+
|
|
59
|
+
if version.parse(transformers_version) < version.parse(required_version):
|
|
60
|
+
return before_fn(*before_args, **before_kwargs)
|
|
61
|
+
else:
|
|
62
|
+
return after_fn(*after_args, **after_kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: liger_kernel
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.4
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -32,10 +32,6 @@ License-File: LICENSE
|
|
|
32
32
|
License-File: NOTICE
|
|
33
33
|
Requires-Dist: torch>=2.1.2
|
|
34
34
|
Requires-Dist: triton>=2.3.1
|
|
35
|
-
Provides-Extra: transformers
|
|
36
|
-
Requires-Dist: transformers~=4.0; extra == "transformers"
|
|
37
|
-
Provides-Extra: trl
|
|
38
|
-
Requires-Dist: trl>=0.11.0; extra == "trl"
|
|
39
35
|
Provides-Extra: dev
|
|
40
36
|
Requires-Dist: transformers>=4.44.2; extra == "dev"
|
|
41
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
@@ -46,13 +42,11 @@ Requires-Dist: pytest>=7.1.2; extra == "dev"
|
|
|
46
42
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
47
43
|
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
|
48
44
|
Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
49
|
-
Requires-Dist: torchvision>=0.16.2; extra == "dev"
|
|
50
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
51
|
-
|
|
52
|
-
Requires-Dist:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
Requires-Dist: triton>=3.0.0; extra == "amd"
|
|
46
|
+
Requires-Dist: mkdocs; extra == "dev"
|
|
47
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
48
|
+
Dynamic: provides-extra
|
|
49
|
+
Dynamic: requires-dist
|
|
56
50
|
|
|
57
51
|
<a name="readme-top"></a>
|
|
58
52
|
|
|
@@ -103,6 +97,11 @@ Requires-Dist: triton>=3.0.0; extra == "amd"
|
|
|
103
97
|
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
104
98
|
</a>
|
|
105
99
|
</div>
|
|
100
|
+
<div style="display: block;">
|
|
101
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
102
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
103
|
+
</a>
|
|
104
|
+
</div>
|
|
106
105
|
</td>
|
|
107
106
|
</tr>
|
|
108
107
|
</table>
|
|
@@ -116,7 +115,8 @@ Requires-Dist: triton>=3.0.0; extra == "amd"
|
|
|
116
115
|
<details>
|
|
117
116
|
<summary>Latest News 🔥</summary>
|
|
118
117
|
|
|
119
|
-
- [2024/12/
|
|
118
|
+
- [2024/12/11] We release [v0.5.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.5.0): 80% more memory efficient post training losses (DPO, ORPO, CPO, etc)!
|
|
119
|
+
- [2024/12/5] We release LinkedIn Engineering Blog - [Liger-Kernel: Empowering an open source ecosystem of Triton Kernels for Efficient LLM Training](https://www.linkedin.com/blog/engineering/open-source/liger-kernel-open-source-ecosystem-for-efficient-llm-training)
|
|
120
120
|
- [2024/11/6] We release [v0.4.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.4.0): Full AMD support, Tech Report, Modal CI, Llama-3.2-Vision!
|
|
121
121
|
- [2024/10/21] We have released the tech report of Liger Kernel on Arxiv: https://arxiv.org/pdf/2410.10989
|
|
122
122
|
- [2024/9/6] We release v0.2.1 ([X post](https://x.com/liger_kernel/status/1832168197002510649)). 2500+ Stars, 10+ New Contributors, 50+ PRs, 50k Downloads in two weeks!
|
|
@@ -128,7 +128,7 @@ Requires-Dist: triton>=3.0.0; extra == "amd"
|
|
|
128
128
|
|
|
129
129
|
**Liger Kernel** is a collection of Triton kernels designed specifically for LLM training. It can effectively increase multi-GPU **training throughput by 20%** and reduces **memory usage by 60%**. We have implemented **Hugging Face Compatible** `RMSNorm`, `RoPE`, `SwiGLU`, `CrossEntropy`, `FusedLinearCrossEntropy`, and more to come. The kernel works out of the box with [Flash Attention](https://github.com/Dao-AILab/flash-attention), [PyTorch FSDP](https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html), and [Microsoft DeepSpeed](https://github.com/microsoft/DeepSpeed). We welcome contributions from the community to gather the best kernels for LLM training.
|
|
130
130
|
|
|
131
|
-
We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, JSD, and many more.
|
|
131
|
+
We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
|
|
132
132
|
|
|
133
133
|
## Supercharge Your Model with Liger Kernel
|
|
134
134
|
|
|
@@ -145,6 +145,21 @@ With one line of code, Liger Kernel can increase throughput by more than 20% and
|
|
|
145
145
|
> - Benchmark conditions: LLaMA 3-8B, Batch Size = 8, Data Type = `bf16`, Optimizer = AdamW, Gradient Checkpointing = True, Distributed Strategy = FSDP1 on 8 A100s.
|
|
146
146
|
> - Hugging Face models start to OOM at a 4K context length, whereas Hugging Face + Liger Kernel scales up to 16K.
|
|
147
147
|
|
|
148
|
+
## Optimize Post Training with Liger Kernel
|
|
149
|
+
|
|
150
|
+
<p align="center">
|
|
151
|
+
<img src="https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/post-training.png" width="50%" alt="Post Training">
|
|
152
|
+
</p>
|
|
153
|
+
|
|
154
|
+
We provide optimized post training kernels like DPO, ORPO, SimPO, and more which can reduce memory usage by up to 80%. You can easily use them as python modules.
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from liger_kernel.chunked_loss import LigerFusedLinearDPOLoss
|
|
158
|
+
orpo_loss = LigerFusedLinearORPOLoss()
|
|
159
|
+
y = orpo_loss(lm_head.weight, x, target)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
|
|
148
163
|
## Examples
|
|
149
164
|
|
|
150
165
|
| **Use Case** | **Description** |
|
|
@@ -178,6 +193,11 @@ With one line of code, Liger Kernel can increase throughput by more than 20% and
|
|
|
178
193
|
- `torch >= 2.5.0` Install according to the instruction in Pytorch official webpage.
|
|
179
194
|
- `triton >= 3.0.0` Install from pypi. (e.g. `pip install triton==3.0.0`)
|
|
180
195
|
|
|
196
|
+
```bash
|
|
197
|
+
# Need to pass the url when installing
|
|
198
|
+
pip install -e .[dev] --extra-index-url https://download.pytorch.org/whl/nightly/rocm6.2
|
|
199
|
+
```
|
|
200
|
+
|
|
181
201
|
### Optional Dependencies
|
|
182
202
|
|
|
183
203
|
- `transformers >= 4.x`: Required if you plan to use the transformers models patching APIs. The specific model you are working will dictate the minimum version of transformers.
|
|
@@ -202,11 +222,13 @@ To install from source:
|
|
|
202
222
|
```bash
|
|
203
223
|
git clone https://github.com/linkedin/Liger-Kernel.git
|
|
204
224
|
cd Liger-Kernel
|
|
225
|
+
|
|
226
|
+
# Install Default Dependencies
|
|
227
|
+
# Setup.py will detect whether you are using AMD or NVIDIA
|
|
205
228
|
pip install -e .
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
pip install -e .[transformers]
|
|
229
|
+
|
|
230
|
+
# Setup Development Dependencies
|
|
231
|
+
pip install -e ".[dev]"
|
|
210
232
|
```
|
|
211
233
|
|
|
212
234
|
|
|
@@ -252,7 +274,7 @@ model = transformers.AutoModelForCausalLM("path/to/llama/model")
|
|
|
252
274
|
|
|
253
275
|
### 3. Compose Your Own Model
|
|
254
276
|
|
|
255
|
-
You can take individual [kernels](#kernels) to compose your models.
|
|
277
|
+
You can take individual [kernels](https://github.com/linkedin/Liger-Kernel?tab=readme-ov-file#model-kernels) to compose your models.
|
|
256
278
|
|
|
257
279
|
```python
|
|
258
280
|
from liger_kernel.transformers import LigerFusedLinearCrossEntropyLoss
|
|
@@ -291,8 +313,10 @@ loss.backward()
|
|
|
291
313
|
| Gemma1 | `liger_kernel.transformers.apply_liger_kernel_to_gemma` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
292
314
|
| Gemma2 | `liger_kernel.transformers.apply_liger_kernel_to_gemma2` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
293
315
|
| Qwen2, Qwen2.5, & QwQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
294
|
-
| Qwen2-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
316
|
+
| Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
295
317
|
| Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
318
|
+
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
319
|
+
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
296
320
|
|
|
297
321
|
|
|
298
322
|
## Low-level APIs
|
|
@@ -321,6 +345,7 @@ loss.backward()
|
|
|
321
345
|
| Fused Linear DPO Loss | `liger_kernel.chunked_loss.LigerFusedLinearDPOLoss` |
|
|
322
346
|
| Fused Linear ORPO Loss | `liger_kernel.chunked_loss.LigerFusedLinearORPOLoss` |
|
|
323
347
|
| Fused Linear SimPO Loss | `liger_kernel.chunked_loss.LigerFusedLinearSimPOLoss` |
|
|
348
|
+
| Fused Linear KTO Loss | `liger_kernel.chunked_loss.LigerFusedLinearKTOLoss` |
|
|
324
349
|
|
|
325
350
|
### Distillation Kernels
|
|
326
351
|
|
|
@@ -329,6 +354,7 @@ loss.backward()
|
|
|
329
354
|
| KLDivergence | `liger_kernel.transformers.LigerKLDIVLoss` |
|
|
330
355
|
| JSD | `liger_kernel.transformers.LigerJSD` |
|
|
331
356
|
| Fused Linear JSD | `liger_kernel.transformers.LigerFusedLinearJSD` |
|
|
357
|
+
| TVD | `liger_kernel.transformers.LigerTVDLoss` |
|
|
332
358
|
|
|
333
359
|
### Experimental Kernels
|
|
334
360
|
|
|
@@ -340,16 +366,17 @@ loss.backward()
|
|
|
340
366
|
|
|
341
367
|
## Contributing, Acknowledgements, and License
|
|
342
368
|
|
|
343
|
-
- [Contributing Guidelines](https://github.com/linkedin/Liger-Kernel/blob/main/docs/
|
|
344
|
-
- [Acknowledgements](https://github.com/linkedin/Liger-Kernel/blob/main/docs/
|
|
345
|
-
- [License Information](https://github.com/linkedin/Liger-Kernel/blob/main/docs/
|
|
369
|
+
- [Contributing Guidelines](https://github.com/linkedin/Liger-Kernel/blob/main/docs/contributing.md)
|
|
370
|
+
- [Acknowledgements](https://github.com/linkedin/Liger-Kernel/blob/main/docs/acknowledgement.md)
|
|
371
|
+
- [License Information](https://github.com/linkedin/Liger-Kernel/blob/main/docs/license.md)
|
|
346
372
|
|
|
347
373
|
## Sponsorship and Collaboration
|
|
348
|
-
|
|
374
|
+
|
|
375
|
+
- [Glows.ai](https://platform.glows.ai/): Sponsoring NVIDIA GPUs for our open source developers.
|
|
349
376
|
- [AMD](https://www.amd.com/en.html): Providing AMD GPUs for our AMD CI.
|
|
350
377
|
- [Intel](https://www.intel.com/): Providing Intel GPUs for our Intel CI.
|
|
351
378
|
- [Modal](https://modal.com/): Free 3000 credits from GPU MODE IRL for our NVIDIA CI.
|
|
352
|
-
- [EmbeddedLLM](https://embeddedllm.com/): Making Liger Kernel run fast and stable on AMD.
|
|
379
|
+
- [EmbeddedLLM](https://embeddedllm.com/): Making Liger Kernel run fast and stable on AMD.
|
|
353
380
|
- [HuggingFace](https://huggingface.co/): Integrating Liger Kernel into Hugging Face Transformers and TRL.
|
|
354
381
|
- [Lightning AI](https://lightning.ai/): Integrating Liger Kernel into Lightning Thunder.
|
|
355
382
|
- [Axolotl](https://axolotl.ai/): Integrating Liger Kernel into Axolotl.
|
|
@@ -359,7 +386,7 @@ loss.backward()
|
|
|
359
386
|
|
|
360
387
|
- For issues, create a Github ticket in this repository
|
|
361
388
|
- For open discussion, join [our discord channel](https://discord.gg/gpumode)
|
|
362
|
-
- For formal collaboration, send an email to
|
|
389
|
+
- For formal collaboration, send an email to yannchen@linkedin.com
|
|
363
390
|
|
|
364
391
|
## Cite this work
|
|
365
392
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
|
|
3
|
+
liger_kernel/utils.py,sha256=FtVUkCGBT1UNasTl6HMNycWwiwHayK6tx-ZDdA-sNX4,1884
|
|
4
|
+
liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
|
|
5
|
+
liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
|
|
6
|
+
liger_kernel/chunked_loss/cpo_loss.py,sha256=OdBR8WYdHTKpLI_c9DcuwqKSWPeAAeTyREz46Vu_cAY,3682
|
|
7
|
+
liger_kernel/chunked_loss/dpo_loss.py,sha256=wgjnwzLfrMUwV5mXgrq6G1YfQKWnbiFJegaP48BGJHY,4509
|
|
8
|
+
liger_kernel/chunked_loss/functional.py,sha256=THWWpCnRVhTVfnPnyvQjdBvo1JDtxhwLmtZE_yiBBqM,817
|
|
9
|
+
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=5V8rdva89WyHVbmJ8JOmC4DYNOR6ByXfx3qlUieOZkI,11002
|
|
10
|
+
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=idK9V9NivoVITqVpiG0fEGUHSvinYWkn9-EYXZjR-KQ,18356
|
|
11
|
+
liger_kernel/chunked_loss/fused_linear_rlhf.py,sha256=sAApL4GQ3YL2F-ymIAF61GCpFfBgFcWF5LB4Gzd7LgY,8044
|
|
12
|
+
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=ZqYlXXhIphkJPxOS7iI70avgrr6x0skEtgpckZTYau0,9819
|
|
13
|
+
liger_kernel/chunked_loss/grpo_loss.py,sha256=M5qlQR-v5Rh8N3P3dPGNhOKygDFJ4516_rJaVPzU_-c,4980
|
|
14
|
+
liger_kernel/chunked_loss/jsd_loss.py,sha256=yRCQdvd3ruTWP4A_BfU8VcZ6LepSUfO0Ob7stGnueQY,6052
|
|
15
|
+
liger_kernel/chunked_loss/kto_loss.py,sha256=b3ffJyk97e-6XdXd4HFrYyx8wW4A-CU4gOaJSimKLtA,5476
|
|
16
|
+
liger_kernel/chunked_loss/orpo_loss.py,sha256=yjcrrbVeemLYodoSKT-FMSnaPtyKAZ3aOrvPD6tTY6Y,3617
|
|
17
|
+
liger_kernel/chunked_loss/simpo_loss.py,sha256=3TTc7U79Orjgi-Wu81WZkWk5MgsdqKXIOBHgIvDazPw,3865
|
|
18
|
+
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
liger_kernel/ops/cross_entropy.py,sha256=D6vFFloiuxFXoWfjlIjmfO3tVaWOiYmztw9FKAi5vdU,18608
|
|
20
|
+
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1Y3Uk_TCSjqKgoG2eot1ptnWXJXXQESqGvOmqAW1gsM,10912
|
|
21
|
+
liger_kernel/ops/fused_linear_jsd.py,sha256=Seshez2qaM6HiTQ8_HEqSwhaeVruNT1SvIM4ZrAPBEU,9602
|
|
22
|
+
liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,4126
|
|
23
|
+
liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
|
|
24
|
+
liger_kernel/ops/jsd.py,sha256=0jNeRxpcNI5ckxCdoCNyO5GEedLIuzx3lz6KAiksc4o,6109
|
|
25
|
+
liger_kernel/ops/kl_div.py,sha256=MnfuYqqQESON1X2Swy064x1urKtMFdgeSWd60VttBXI,8420
|
|
26
|
+
liger_kernel/ops/layer_norm.py,sha256=6roQjioyg-9O2qLPV8nL4U0-5UH80tdzOMTWwjvDnn8,7961
|
|
27
|
+
liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
|
|
28
|
+
liger_kernel/ops/rms_norm.py,sha256=PWLJcdIKU5e-8BuYFHd9Cqlq6wmr6fUXKi9zQD4LetU,11727
|
|
29
|
+
liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
|
|
30
|
+
liger_kernel/ops/swiglu.py,sha256=KmgMjaJQnbLLgZn2nEpbwHU_xpnYRweCyrLQSVvM1vA,3015
|
|
31
|
+
liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
|
|
32
|
+
liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
|
|
33
|
+
liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
|
|
34
|
+
liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
|
|
35
|
+
liger_kernel/transformers/__init__.py,sha256=6v_VcV1GQ9ISgNCd-ZxtmEg_s5GTBQ9F-s1KrFkYzPQ,2265
|
|
36
|
+
liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
|
|
37
|
+
liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
|
|
38
|
+
liger_kernel/transformers/functional.py,sha256=ShLD3eb--XKNtllznCrOYTbo4f-1KVwzi0KLMICdrn4,4942
|
|
39
|
+
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=09Rt7FZzLH42VOcIbQ4dlQd0o3Rlb4vk6fqiOQ7WTD8,1778
|
|
40
|
+
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
41
|
+
liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
|
|
42
|
+
liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
|
|
43
|
+
liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
|
|
44
|
+
liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
|
|
45
|
+
liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
|
|
46
|
+
liger_kernel/transformers/monkey_patch.py,sha256=g3i3q5McBg23A3Mnviw-Eb32le1hvN7jByzONa9ngcs,44000
|
|
47
|
+
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
48
|
+
liger_kernel/transformers/rms_norm.py,sha256=GqCEJuGt0YdqqlMcToE0Wp4A8YFquDa4UUSyH2uFW2A,1191
|
|
49
|
+
liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
|
|
50
|
+
liger_kernel/transformers/swiglu.py,sha256=i9WTqcNRqReU4XJs391IPbl-I5X0wG4T72D4pqGFfJg,2422
|
|
51
|
+
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
52
|
+
liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
|
|
53
|
+
liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
|
|
54
|
+
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
+
liger_kernel/transformers/model/gemma.py,sha256=ky89b3aWPaeTGRMC-745KgixtQIRXzNAiCORAMLn9yo,9654
|
|
56
|
+
liger_kernel/transformers/model/gemma2.py,sha256=27NcoZjEqP9Lqb4Wf0EKqTbr2HTGiHPhrVyPCRGPz6s,10767
|
|
57
|
+
liger_kernel/transformers/model/llama.py,sha256=3LJFXKFDKvEakaWPc_NicSFst4Y_hdSMrdl1UDK1EcA,10330
|
|
58
|
+
liger_kernel/transformers/model/mistral.py,sha256=MVRksI5_j_8WJu8znOHKCdSI5jSu-S7cdFYzt9m_vIQ,5180
|
|
59
|
+
liger_kernel/transformers/model/mixtral.py,sha256=jpZJkpl625Q-JHWarj2MqT5mRaSsiCtg0c9vVyvOdCY,11430
|
|
60
|
+
liger_kernel/transformers/model/mllama.py,sha256=qWexBdskuN3gPJvPUwt4J0nU675tGD6W7wxgRZ9Bifg,11145
|
|
61
|
+
liger_kernel/transformers/model/olmo2.py,sha256=yyksS6E4fuWd8asEW8rEDBKqZpFmP4ITCM_bjIDZaoY,5124
|
|
62
|
+
liger_kernel/transformers/model/phi3.py,sha256=biRa8fph9qdnQmkD9I21t5XIjpIt1i6UKU4uk8Up8pU,10292
|
|
63
|
+
liger_kernel/transformers/model/qwen2.py,sha256=14UuPjxB-tjqWn85Tn4fqBFvVhVsth5iPEt8kJSMiew,9581
|
|
64
|
+
liger_kernel/transformers/model/qwen2_vl.py,sha256=yMLqsfSYcvhClUpTUjGoADiOxfLB2B8240VdrPP0c8s,9851
|
|
65
|
+
liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
|
|
66
|
+
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
|
|
67
|
+
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
68
|
+
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
69
|
+
liger_kernel-0.5.4.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
70
|
+
liger_kernel-0.5.4.dist-info/METADATA,sha256=Zw7n3Ey6vUed4E54H9-TzKmhuOpd9P2ZFMVL-zYUnew,22255
|
|
71
|
+
liger_kernel-0.5.4.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
72
|
+
liger_kernel-0.5.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
73
|
+
liger_kernel-0.5.4.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
74
|
+
liger_kernel-0.5.4.dist-info/RECORD,,
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
liger_kernel/env_report.py,sha256=FViyPju795lB6z4k2TZldvBSmQdcS0A2hcnDxepJrDo,1822
|
|
3
|
-
liger_kernel/utils.py,sha256=HJa-xVKOohDn6pLVIx-Fv0V9h0QAL3qZGQNRICI-OpI,249
|
|
4
|
-
liger_kernel/chunked_loss/__init__.py,sha256=R2wCcz4Y0kTAve926DH3k182XKezpXeACMHj05g9Mm8,346
|
|
5
|
-
liger_kernel/chunked_loss/cpo_loss.py,sha256=Qu1Ul2A12sp6CqIT-atPbHWFb_LLtINEA9mOpIRx_0g,3097
|
|
6
|
-
liger_kernel/chunked_loss/dpo_loss.py,sha256=H9_RRhclckHYM2sd75tgbnf8IxC_PU2JCALbgtPQvwc,4222
|
|
7
|
-
liger_kernel/chunked_loss/functional.py,sha256=9Gr-YXIuEzEJkBUhDx3G2fuQayckLor7cC7svhmPML4,549
|
|
8
|
-
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=2BH6DCPjsR2zS6zcwFPcIIZRhLF8SohjGdKsAJ_301o,10222
|
|
9
|
-
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=vlWfaaIECWvCQhY9PM7zRI0vKThIrydMf6P44bXn1EE,15114
|
|
10
|
-
liger_kernel/chunked_loss/orpo_loss.py,sha256=ZuKGjbkIYzV4UzvupNdq6vyxCp7-BztQkUt8ZnFvKos,3531
|
|
11
|
-
liger_kernel/chunked_loss/simpo_loss.py,sha256=Wa4LOlDG9PbJkOOkKg8hbKvnKgg7OTBz6-qIkwPK1yw,3275
|
|
12
|
-
liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
liger_kernel/ops/cross_entropy.py,sha256=oG5hfrlmnlF5lOoZRhHRglObxgH4B0KadjWMJj9EWPM,15860
|
|
14
|
-
liger_kernel/ops/fused_linear_cross_entropy.py,sha256=Tnw4gyAYVVdnCOqhOuLEzbUQ3goOTnoAfk3pqSIM5ac,9301
|
|
15
|
-
liger_kernel/ops/fused_linear_jsd.py,sha256=nOv4zwfxHqqepKEmMsQuz-B3H-gRjyo8uClpmqSGLYA,9693
|
|
16
|
-
liger_kernel/ops/geglu.py,sha256=MQL4zyzneZqZYUGPvb1QjI_EYT9_pKfSDgR25WD9jrI,4127
|
|
17
|
-
liger_kernel/ops/group_norm.py,sha256=VaRErVJGR4JqgXXvuIjNGTn3E2egjLtU1y3ymwIf4d8,10961
|
|
18
|
-
liger_kernel/ops/jsd.py,sha256=Ap2b0_geCl6fqBXLI1IS6Yn6GlO-8LgPmnOW3y47dus,6151
|
|
19
|
-
liger_kernel/ops/kl_div.py,sha256=03FNXfvCb6M-56hhFepAFV9p6brArPR6KOKkdGD34mw,8374
|
|
20
|
-
liger_kernel/ops/layer_norm.py,sha256=_CZggw3GNEIUx5weDzadFit5I-Lzosoo8prgeJzcViY,7589
|
|
21
|
-
liger_kernel/ops/qwen2vl_mrope.py,sha256=GvP4Cg-2ClYyiqbe7bB_OMvnlZooBmqP2-9V8RMPde4,8598
|
|
22
|
-
liger_kernel/ops/rms_norm.py,sha256=g7OXwuYI8-LXudDwvXuiupVjjOsbu8c4wwv83VaHa54,11750
|
|
23
|
-
liger_kernel/ops/rope.py,sha256=jrzaA9-6Orn44y_IIam9_YNPQxOFK2FrIRNfFea4EtU,8513
|
|
24
|
-
liger_kernel/ops/swiglu.py,sha256=Fwxtd76rhHKT9ShQAGca9RsnASplAVxtYKHmiT73_yA,2994
|
|
25
|
-
liger_kernel/ops/utils.py,sha256=_VQvd1PX5JXm5xaiBrk2gANp3qr4kM7qYG3ypkBwkMs,3850
|
|
26
|
-
liger_kernel/ops/experimental/embedding.py,sha256=LYR66dB-jhvhtUjeV4PnNro-n77J1mdlmpSLSxB3Y6U,4186
|
|
27
|
-
liger_kernel/ops/experimental/mm_int8int2.py,sha256=JpGVZCgRC6T8XMUJ_QbZRS2XU1bh0urIZphs5DTc1mY,13358
|
|
28
|
-
liger_kernel/transformers/__init__.py,sha256=gia-eBxr7TLxU0GdDf8AfCY4WgDlFLqIGSt7EoQGsBA,1336
|
|
29
|
-
liger_kernel/transformers/auto_model.py,sha256=RMIwQHSiXoksXFTIqFZ4PLBgoqkxJJAT3q1Qh47bGN8,1552
|
|
30
|
-
liger_kernel/transformers/cross_entropy.py,sha256=yEm_YQ7oa3_BzT3hdW6KrAslduhSqWcJQVNZZDcWCg4,1758
|
|
31
|
-
liger_kernel/transformers/functional.py,sha256=sUBoU8Vb4pLpr9G6IdkRsToYgh-rCXL4OLYat7Tv_GU,4450
|
|
32
|
-
liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=_i0PXSp5iZ9pKXdEeZ4lvHCENJYjV4y74yz3ZRG5XQg,1484
|
|
33
|
-
liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
|
|
34
|
-
liger_kernel/transformers/geglu.py,sha256=QcrME_8ooIn0xa59LaC0aoOdRrBIFd11Y0bAyF0NfCw,1130
|
|
35
|
-
liger_kernel/transformers/group_norm.py,sha256=FJ9R7mS9G1wO-GRIQ6QKSmIhnZ6nQ6GIkE4NnX_hnn0,2241
|
|
36
|
-
liger_kernel/transformers/jsd.py,sha256=sbr8DnKSYZJH9pv2rpmboNijYGpZKbhb2-WSGp5_v6g,3001
|
|
37
|
-
liger_kernel/transformers/kl_div.py,sha256=qVhjBg6tjRyue5iZ3NFxo8uySY4JuIFJyv0IM_50F24,431
|
|
38
|
-
liger_kernel/transformers/layer_norm.py,sha256=fd6o4kSHJWolQMWxh-l1qObfgL08ruNbUoBiANKX1ow,972
|
|
39
|
-
liger_kernel/transformers/monkey_patch.py,sha256=Fk2v4GZQDJzfh3Cpc6BHNJbs_tungDyWmqS9nuG9Lc4,38406
|
|
40
|
-
liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
|
|
41
|
-
liger_kernel/transformers/rms_norm.py,sha256=AHstklNIO1PLHjjCBU-TPuUD-Fl_pycJUTLlJNojbV8,1189
|
|
42
|
-
liger_kernel/transformers/rope.py,sha256=m-ah8vZBYW8tfplTXCiAPMHJWlB1tdp_JPXJeWE-Boo,943
|
|
43
|
-
liger_kernel/transformers/swiglu.py,sha256=0-tVJ8xEYfhxnduc16PflXFj8sZPxdx9sHUn3hfwCI4,2468
|
|
44
|
-
liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
|
|
45
|
-
liger_kernel/transformers/experimental/embedding.py,sha256=HpckiAMKM8-SRxKDcGTqortVxnjhwpZsfsp9lfjqfeM,895
|
|
46
|
-
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
liger_kernel/transformers/model/gemma.py,sha256=R4huxuR48gkLrdT8KqV7As2v9dZtEmcGVz6YG1ZmuJE,9692
|
|
48
|
-
liger_kernel/transformers/model/gemma2.py,sha256=zxQsxCRqkoxCES3GJPVI7soUuF3J5HZDlvJgaBos1zM,10836
|
|
49
|
-
liger_kernel/transformers/model/llama.py,sha256=RinsgC_eR-YNvZd2SHPQxZ4eyR3uViaTFCM3SvI5nks,10426
|
|
50
|
-
liger_kernel/transformers/model/mistral.py,sha256=XpL1rlWg_llvW3z_Hf_d8WQs7uQaH4ds7EZ2SxjQHsU,5144
|
|
51
|
-
liger_kernel/transformers/model/mixtral.py,sha256=nyDS1dBpsOXYC2DuW59Hgu7ZrGftrHuWPfNqjcNPIxs,11503
|
|
52
|
-
liger_kernel/transformers/model/mllama.py,sha256=mesNCgj0Ea1O-fqRD4LVxDJ1CR2abY_zAzK_bfVzkiU,11222
|
|
53
|
-
liger_kernel/transformers/model/phi3.py,sha256=xUZPlaPKwknLjHc3uUW3EPodm1h0vD3G7Qnhh51v-Io,10332
|
|
54
|
-
liger_kernel/transformers/model/qwen2.py,sha256=EyhSSzQOskGjSnCsKMZpd1s5IAIlHd5PBO3q0MoCs00,9619
|
|
55
|
-
liger_kernel/transformers/model/qwen2_vl.py,sha256=bIQe2bWiY--G84FhCD29Gdi64_qHP6vbcGsK6vKysQE,8547
|
|
56
|
-
liger_kernel/transformers/trainer/__init__.py,sha256=c4OQVJmhNOloj0JYSEc0j_cQuBbzGWILfaowUR1hmRw,210
|
|
57
|
-
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=jko6oq_XQdBSmXubp05E-_YXOyhtB5Bj75dg5YNwOsE,7517
|
|
58
|
-
liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
|
|
59
|
-
liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
|
|
60
|
-
liger_kernel-0.5.2.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
61
|
-
liger_kernel-0.5.2.dist-info/METADATA,sha256=olSIT-Jd2Mowu2ja4QLwyPYBhCnY22znBq9pV7stkKI,20695
|
|
62
|
-
liger_kernel-0.5.2.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
63
|
-
liger_kernel-0.5.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
64
|
-
liger_kernel-0.5.2.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
65
|
-
liger_kernel-0.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|