liger-kernel 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. liger_kernel/chunked_loss/README.md +25 -0
  2. liger_kernel/chunked_loss/__init__.py +3 -0
  3. liger_kernel/chunked_loss/cpo_loss.py +18 -8
  4. liger_kernel/chunked_loss/dpo_loss.py +20 -10
  5. liger_kernel/chunked_loss/functional.py +4 -0
  6. liger_kernel/chunked_loss/fused_linear_distillation.py +58 -44
  7. liger_kernel/chunked_loss/fused_linear_preference.py +108 -60
  8. liger_kernel/chunked_loss/fused_linear_rlhf.py +213 -0
  9. liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +246 -0
  10. liger_kernel/chunked_loss/grpo_loss.py +160 -0
  11. liger_kernel/chunked_loss/jsd_loss.py +154 -0
  12. liger_kernel/chunked_loss/kto_loss.py +172 -0
  13. liger_kernel/chunked_loss/orpo_loss.py +8 -9
  14. liger_kernel/chunked_loss/simpo_loss.py +22 -8
  15. liger_kernel/env_report.py +5 -12
  16. liger_kernel/ops/cross_entropy.py +102 -51
  17. liger_kernel/ops/experimental/embedding.py +1 -3
  18. liger_kernel/ops/experimental/mm_int8int2.py +3 -9
  19. liger_kernel/ops/fused_linear_cross_entropy.py +89 -55
  20. liger_kernel/ops/fused_linear_jsd.py +14 -32
  21. liger_kernel/ops/geglu.py +6 -17
  22. liger_kernel/ops/group_norm.py +11 -28
  23. liger_kernel/ops/jsd.py +5 -9
  24. liger_kernel/ops/kl_div.py +8 -11
  25. liger_kernel/ops/layer_norm.py +23 -12
  26. liger_kernel/ops/qwen2vl_mrope.py +8 -25
  27. liger_kernel/ops/rms_norm.py +14 -32
  28. liger_kernel/ops/rope.py +31 -33
  29. liger_kernel/ops/swiglu.py +4 -8
  30. liger_kernel/ops/tvd.py +207 -0
  31. liger_kernel/ops/utils.py +3 -2
  32. liger_kernel/transformers/__init__.py +19 -24
  33. liger_kernel/transformers/auto_model.py +6 -13
  34. liger_kernel/transformers/cross_entropy.py +7 -9
  35. liger_kernel/transformers/experimental/embedding.py +1 -3
  36. liger_kernel/transformers/functional.py +28 -7
  37. liger_kernel/transformers/fused_linear_cross_entropy.py +15 -10
  38. liger_kernel/transformers/geglu.py +1 -4
  39. liger_kernel/transformers/group_norm.py +9 -15
  40. liger_kernel/transformers/jsd.py +1 -3
  41. liger_kernel/transformers/kl_div.py +1 -3
  42. liger_kernel/transformers/layer_norm.py +3 -9
  43. liger_kernel/transformers/model/gemma.py +18 -40
  44. liger_kernel/transformers/model/gemma2.py +19 -41
  45. liger_kernel/transformers/model/llama.py +22 -48
  46. liger_kernel/transformers/model/mistral.py +14 -26
  47. liger_kernel/transformers/model/mixtral.py +24 -54
  48. liger_kernel/transformers/model/mllama.py +16 -36
  49. liger_kernel/transformers/model/olmo2.py +124 -0
  50. liger_kernel/transformers/model/phi3.py +18 -40
  51. liger_kernel/transformers/model/qwen2.py +18 -40
  52. liger_kernel/transformers/model/qwen2_vl.py +36 -32
  53. liger_kernel/transformers/monkey_patch.py +214 -144
  54. liger_kernel/transformers/rms_norm.py +4 -4
  55. liger_kernel/transformers/rope.py +2 -2
  56. liger_kernel/transformers/swiglu.py +2 -8
  57. liger_kernel/transformers/trainer/__init__.py +1 -3
  58. liger_kernel/transformers/trainer/orpo_trainer.py +31 -18
  59. liger_kernel/transformers/tvd.py +13 -0
  60. liger_kernel/triton/__init__.py +1 -3
  61. liger_kernel/triton/monkey_patch.py +1 -3
  62. liger_kernel/utils.py +49 -0
  63. {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/METADATA +53 -26
  64. liger_kernel-0.5.4.dist-info/RECORD +74 -0
  65. {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/WHEEL +1 -1
  66. liger_kernel-0.5.2.dist-info/RECORD +0 -65
  67. {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/LICENSE +0 -0
  68. {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/NOTICE +0 -0
  69. {liger_kernel-0.5.2.dist-info → liger_kernel-0.5.4.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,14 @@
1
- from typing import Any, Callable, Dict, List, Literal, Tuple, Union
1
+ from typing import Any
2
+ from typing import Callable
3
+ from typing import Dict
4
+ from typing import List
5
+ from typing import Literal
6
+ from typing import Tuple
7
+ from typing import Union
2
8
 
3
9
  import torch
4
10
  import torch.nn as nn
11
+
5
12
  from torch.distributed.fsdp import FullyShardedDataParallel
6
13
  from trl.trainer import ORPOTrainer
7
14
 
@@ -17,7 +24,7 @@ class _FSDPForwardRedirection:
17
24
  This is needed in cases where we call a submodule of a FSDP module. For instance, when we want to call only
18
25
  the `LlamaModel` part out of a FSDP-wrapped `LlamaForCausalLM` to get the hidden states without involving
19
26
  GPU-memory-heavy `lm_head` and cross entropy computation, doing this directly (i.e. `model.model.forward()`)
20
- will not work because the first `nn.Emebedding` layer is not independently wrapped as a FSDP module (because of
27
+ will not work because the first `nn.Embedding` layer is not independently wrapped as a FSDP module (because of
21
28
  the transformer-based wrapping policy), and not calling it through FSDP root module forward will not all-gather
22
29
  its parameter, thus resulting in "RuntimeError: 'weight' must be 2-D" error. Similarly, if we want to call just
23
30
  the `lm_head` part of a model, we need this trick too to properly get its params all-gathered.
@@ -62,9 +69,7 @@ class _FSDPForwardRedirection:
62
69
  class LigerORPOTrainer(ORPOTrainer):
63
70
  def concatenated_forward(
64
71
  self, model: nn.Module, batch: Dict[str, Union[List, torch.LongTensor]]
65
- ) -> Tuple[
66
- torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor
67
- ]:
72
+ ) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
68
73
  """
69
74
  Run the given model on the given batch of inputs, concatenating the chosen and rejected inputs together.
70
75
  We do this to avoid doing two forward passes, because it's faster for FSDP.
@@ -79,9 +84,7 @@ class LigerORPOTrainer(ORPOTrainer):
79
84
 
80
85
  model_kwargs = (
81
86
  {
82
- "decoder_input_ids": self._shift_right(
83
- concatenated_batch["concatenated_labels"]
84
- ),
87
+ "decoder_input_ids": self._shift_right(concatenated_batch["concatenated_labels"]),
85
88
  }
86
89
  if self.is_encoder_decoder
87
90
  else {}
@@ -90,6 +93,13 @@ class LigerORPOTrainer(ORPOTrainer):
90
93
  if self.aux_loss_enabled:
91
94
  model_kwargs["output_router_logits"] = True
92
95
 
96
+ if self.is_encoder_decoder:
97
+ labels = concatenated_batch["concatenated_labels"].clone()
98
+ else:
99
+ labels = concatenated_batch["concatenated_input_ids"].clone()
100
+ attention_mask = concatenated_batch["concatenated_attention_mask"]
101
+ labels = torch.where(attention_mask == 1, labels, self.label_pad_token_id)
102
+
93
103
  if isinstance(model, FullyShardedDataParallel):
94
104
  outputs = _FSDPForwardRedirection()(
95
105
  model,
@@ -109,22 +119,27 @@ class LigerORPOTrainer(ORPOTrainer):
109
119
  **model_kwargs,
110
120
  )
111
121
 
112
- orpo_loss_fn = LigerFusedLinearORPOLoss(
113
- ignore_index=self.label_pad_token_id, beta=self.beta
114
- )
122
+ orpo_loss_fn = LigerFusedLinearORPOLoss(ignore_index=self.label_pad_token_id, beta=self.beta)
115
123
 
116
- def orpo_partial(lm_head, last_hidden_state, concatenated_labels):
124
+ def orpo_partial(lm_head, last_hidden_state, concatenated_labels, nll_target):
117
125
  return orpo_loss_fn(
118
- lm_head.weight, last_hidden_state, concatenated_labels, lm_head.bias
126
+ lm_head.weight, last_hidden_state, concatenated_labels, lm_head.bias, nll_target=nll_target
119
127
  )
120
128
 
121
129
  orpo_loss, aux_outputs = _FSDPForwardRedirection()(
122
130
  model,
123
131
  orpo_partial,
124
132
  model.lm_head,
125
- outputs.last_hidden_state,
126
- concatenated_batch["concatenated_labels"],
133
+ outputs.last_hidden_state[:, :-1] if not self.is_encoder_decoder else outputs.last_hidden_state,
134
+ concatenated_batch["concatenated_labels"][:, 1:]
135
+ if not self.is_encoder_decoder
136
+ else concatenated_batch["concatenated_labels"],
137
+ labels[:, 1:] if not self.is_encoder_decoder else labels,
127
138
  )
139
+ # if aux_loss_enabled, add the aux_loss to the orpo_loss
140
+ if self.aux_loss_enabled:
141
+ orpo_loss += self.aux_loss_coef * outputs.aux_loss
142
+
128
143
  return orpo_loss, aux_outputs
129
144
 
130
145
  def get_batch_loss_metrics(
@@ -145,9 +160,7 @@ class LigerORPOTrainer(ORPOTrainer):
145
160
  ) = aux_outputs[:5]
146
161
 
147
162
  # return loss, metrics
148
- chosen_rewards, rejected_rewards, log_odds_ratio, log_odds_chosen = aux_outputs[
149
- 5:
150
- ]
163
+ chosen_rewards, rejected_rewards, log_odds_ratio, log_odds_chosen = aux_outputs[5:]
151
164
 
152
165
  reward_accuracies = (chosen_rewards > rejected_rewards).float()
153
166
 
@@ -0,0 +1,13 @@
1
+ import torch.nn as nn
2
+
3
+ from liger_kernel.ops.tvd import LigerTVDLossFunction
4
+
5
+
6
+ class LigerTVDLoss(nn.Module):
7
+ def __init__(self, reduction="batchmean", ignore_index: int = -100):
8
+ super(LigerTVDLoss, self).__init__()
9
+ self.reduction = reduction
10
+ self.ignore_index = ignore_index
11
+
12
+ def forward(self, p, q, shift_labels=None):
13
+ return LigerTVDLossFunction.apply(p, q, shift_labels, self.reduction, self.ignore_index)
@@ -1,3 +1 @@
1
- from liger_kernel.triton.monkey_patch import ( # noqa: F401
2
- apply_liger_triton_cache_manager,
3
- )
1
+ from liger_kernel.triton.monkey_patch import apply_liger_triton_cache_manager # noqa: F401
@@ -37,6 +37,4 @@ def apply_liger_triton_cache_manager():
37
37
  Experimental feature to get around transient FileNotFoundError in triton compilation.
38
38
  For more details please see https://github.com/triton-lang/triton/pull/4295
39
39
  """
40
- os.environ["TRITON_CACHE_MANAGER"] = (
41
- "liger_kernel.triton.monkey_patch:LigerTritonFileCacheManager"
42
- )
40
+ os.environ["TRITON_CACHE_MANAGER"] = "liger_kernel.triton.monkey_patch:LigerTritonFileCacheManager"
liger_kernel/utils.py CHANGED
@@ -9,5 +9,54 @@ def infer_device():
9
9
  return "cuda"
10
10
  elif torch.xpu.is_available():
11
11
  return "xpu"
12
+ elif torch.hip.is_available():
13
+ return "hip"
12
14
  else:
13
15
  return "cpu"
16
+
17
+
18
+ def transformers_version_dispatch(
19
+ required_version: str,
20
+ before_fn,
21
+ after_fn,
22
+ before_args: tuple = (),
23
+ after_args: tuple = (),
24
+ before_kwargs: dict = None,
25
+ after_kwargs: dict = None,
26
+ ):
27
+ """
28
+ Dispatches to different functions based on package version comparison.
29
+
30
+ Args:
31
+ required_version: Version to compare against (e.g. "4.48.0")
32
+ before_fn: Function to call if package_version < required_version
33
+ after_fn: Function to call if package_version >= required_version
34
+ before_args: Positional arguments for before_fn
35
+ after_args: Positional arguments for after_fn
36
+ before_kwargs: Keyword arguments for before_fn
37
+ after_kwargs: Keyword arguments for after_fn
38
+
39
+ Returns:
40
+ Result from either before_fn or after_fn
41
+
42
+ Example:
43
+ >>> rotary_emb = transformers_version_dispatch(
44
+ ... "4.48.0",
45
+ ... LlamaRotaryEmbedding,
46
+ ... LlamaRotaryEmbedding,
47
+ ... before_args=(head_dim,),
48
+ ... after_args=(LlamaConfig(head_dim=head_dim),),
49
+ ... before_kwargs={'device': device},
50
+ ... after_kwargs={'device': device}
51
+ ... )
52
+ """
53
+ from packaging import version
54
+ from transformers import __version__ as transformers_version
55
+
56
+ before_kwargs = before_kwargs or {}
57
+ after_kwargs = after_kwargs or {}
58
+
59
+ if version.parse(transformers_version) < version.parse(required_version):
60
+ return before_fn(*before_args, **before_kwargs)
61
+ else:
62
+ return after_fn(*after_args, **after_kwargs)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: liger_kernel
3
- Version: 0.5.2
3
+ Version: 0.5.4
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -32,10 +32,6 @@ License-File: LICENSE
32
32
  License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
- Provides-Extra: transformers
36
- Requires-Dist: transformers~=4.0; extra == "transformers"
37
- Provides-Extra: trl
38
- Requires-Dist: trl>=0.11.0; extra == "trl"
39
35
  Provides-Extra: dev
40
36
  Requires-Dist: transformers>=4.44.2; extra == "dev"
41
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
@@ -46,13 +42,11 @@ Requires-Dist: pytest>=7.1.2; extra == "dev"
46
42
  Requires-Dist: pytest-xdist; extra == "dev"
47
43
  Requires-Dist: pytest-rerunfailures; extra == "dev"
48
44
  Requires-Dist: datasets>=2.19.2; extra == "dev"
49
- Requires-Dist: torchvision>=0.16.2; extra == "dev"
50
45
  Requires-Dist: seaborn; extra == "dev"
51
- Provides-Extra: amd
52
- Requires-Dist: torch>=2.6.0.dev; extra == "amd"
53
- Requires-Dist: setuptools-scm>=8; extra == "amd"
54
- Requires-Dist: torchvision>=0.20.0.dev; extra == "amd"
55
- Requires-Dist: triton>=3.0.0; extra == "amd"
46
+ Requires-Dist: mkdocs; extra == "dev"
47
+ Requires-Dist: mkdocs-material; extra == "dev"
48
+ Dynamic: provides-extra
49
+ Dynamic: requires-dist
56
50
 
57
51
  <a name="readme-top"></a>
58
52
 
@@ -103,6 +97,11 @@ Requires-Dist: triton>=3.0.0; extra == "amd"
103
97
  <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
104
98
  </a>
105
99
  </div>
100
+ <div style="display: block;">
101
+ <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
102
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
103
+ </a>
104
+ </div>
106
105
  </td>
107
106
  </tr>
108
107
  </table>
@@ -116,7 +115,8 @@ Requires-Dist: triton>=3.0.0; extra == "amd"
116
115
  <details>
117
116
  <summary>Latest News 🔥</summary>
118
117
 
119
- - [2024/12/15] We release LinkedIn Engineering Blog - [Liger-Kernel: Empowering an open source ecosystem of Triton Kernels for Efficient LLM Training](https://www.linkedin.com/blog/engineering/open-source/liger-kernel-open-source-ecosystem-for-efficient-llm-training)
118
+ - [2024/12/11] We release [v0.5.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.5.0): 80% more memory efficient post training losses (DPO, ORPO, CPO, etc)!
119
+ - [2024/12/5] We release LinkedIn Engineering Blog - [Liger-Kernel: Empowering an open source ecosystem of Triton Kernels for Efficient LLM Training](https://www.linkedin.com/blog/engineering/open-source/liger-kernel-open-source-ecosystem-for-efficient-llm-training)
120
120
  - [2024/11/6] We release [v0.4.0](https://github.com/linkedin/Liger-Kernel/releases/tag/v0.4.0): Full AMD support, Tech Report, Modal CI, Llama-3.2-Vision!
121
121
  - [2024/10/21] We have released the tech report of Liger Kernel on Arxiv: https://arxiv.org/pdf/2410.10989
122
122
  - [2024/9/6] We release v0.2.1 ([X post](https://x.com/liger_kernel/status/1832168197002510649)). 2500+ Stars, 10+ New Contributors, 50+ PRs, 50k Downloads in two weeks!
@@ -128,7 +128,7 @@ Requires-Dist: triton>=3.0.0; extra == "amd"
128
128
 
129
129
  **Liger Kernel** is a collection of Triton kernels designed specifically for LLM training. It can effectively increase multi-GPU **training throughput by 20%** and reduces **memory usage by 60%**. We have implemented **Hugging Face Compatible** `RMSNorm`, `RoPE`, `SwiGLU`, `CrossEntropy`, `FusedLinearCrossEntropy`, and more to come. The kernel works out of the box with [Flash Attention](https://github.com/Dao-AILab/flash-attention), [PyTorch FSDP](https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html), and [Microsoft DeepSpeed](https://github.com/microsoft/DeepSpeed). We welcome contributions from the community to gather the best kernels for LLM training.
130
130
 
131
- We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, JSD, and many more.
131
+ We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
132
132
 
133
133
  ## Supercharge Your Model with Liger Kernel
134
134
 
@@ -145,6 +145,21 @@ With one line of code, Liger Kernel can increase throughput by more than 20% and
145
145
  > - Benchmark conditions: LLaMA 3-8B, Batch Size = 8, Data Type = `bf16`, Optimizer = AdamW, Gradient Checkpointing = True, Distributed Strategy = FSDP1 on 8 A100s.
146
146
  > - Hugging Face models start to OOM at a 4K context length, whereas Hugging Face + Liger Kernel scales up to 16K.
147
147
 
148
+ ## Optimize Post Training with Liger Kernel
149
+
150
+ <p align="center">
151
+ <img src="https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/post-training.png" width="50%" alt="Post Training">
152
+ </p>
153
+
154
+ We provide optimized post training kernels like DPO, ORPO, SimPO, and more which can reduce memory usage by up to 80%. You can easily use them as python modules.
155
+
156
+ ```python
157
+ from liger_kernel.chunked_loss import LigerFusedLinearDPOLoss
158
+ orpo_loss = LigerFusedLinearORPOLoss()
159
+ y = orpo_loss(lm_head.weight, x, target)
160
+ ```
161
+
162
+
148
163
  ## Examples
149
164
 
150
165
  | **Use Case** | **Description** |
@@ -178,6 +193,11 @@ With one line of code, Liger Kernel can increase throughput by more than 20% and
178
193
  - `torch >= 2.5.0` Install according to the instruction in Pytorch official webpage.
179
194
  - `triton >= 3.0.0` Install from pypi. (e.g. `pip install triton==3.0.0`)
180
195
 
196
+ ```bash
197
+ # Need to pass the url when installing
198
+ pip install -e .[dev] --extra-index-url https://download.pytorch.org/whl/nightly/rocm6.2
199
+ ```
200
+
181
201
  ### Optional Dependencies
182
202
 
183
203
  - `transformers >= 4.x`: Required if you plan to use the transformers models patching APIs. The specific model you are working will dictate the minimum version of transformers.
@@ -202,11 +222,13 @@ To install from source:
202
222
  ```bash
203
223
  git clone https://github.com/linkedin/Liger-Kernel.git
204
224
  cd Liger-Kernel
225
+
226
+ # Install Default Dependencies
227
+ # Setup.py will detect whether you are using AMD or NVIDIA
205
228
  pip install -e .
206
- # or if installing on amd platform
207
- pip install -e .[amd] --extra-index-url https://download.pytorch.org/whl/nightly/rocm6.2 # rocm6.2
208
- # or if using transformers
209
- pip install -e .[transformers]
229
+
230
+ # Setup Development Dependencies
231
+ pip install -e ".[dev]"
210
232
  ```
211
233
 
212
234
 
@@ -252,7 +274,7 @@ model = transformers.AutoModelForCausalLM("path/to/llama/model")
252
274
 
253
275
  ### 3. Compose Your Own Model
254
276
 
255
- You can take individual [kernels](#kernels) to compose your models.
277
+ You can take individual [kernels](https://github.com/linkedin/Liger-Kernel?tab=readme-ov-file#model-kernels) to compose your models.
256
278
 
257
279
  ```python
258
280
  from liger_kernel.transformers import LigerFusedLinearCrossEntropyLoss
@@ -291,8 +313,10 @@ loss.backward()
291
313
  | Gemma1 | `liger_kernel.transformers.apply_liger_kernel_to_gemma` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
292
314
  | Gemma2 | `liger_kernel.transformers.apply_liger_kernel_to_gemma2` | RoPE, RMSNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
293
315
  | Qwen2, Qwen2.5, & QwQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
294
- | Qwen2-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
316
+ | Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
295
317
  | Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
318
+ | Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
319
+ | OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
296
320
 
297
321
 
298
322
  ## Low-level APIs
@@ -321,6 +345,7 @@ loss.backward()
321
345
  | Fused Linear DPO Loss | `liger_kernel.chunked_loss.LigerFusedLinearDPOLoss` |
322
346
  | Fused Linear ORPO Loss | `liger_kernel.chunked_loss.LigerFusedLinearORPOLoss` |
323
347
  | Fused Linear SimPO Loss | `liger_kernel.chunked_loss.LigerFusedLinearSimPOLoss` |
348
+ | Fused Linear KTO Loss | `liger_kernel.chunked_loss.LigerFusedLinearKTOLoss` |
324
349
 
325
350
  ### Distillation Kernels
326
351
 
@@ -329,6 +354,7 @@ loss.backward()
329
354
  | KLDivergence | `liger_kernel.transformers.LigerKLDIVLoss` |
330
355
  | JSD | `liger_kernel.transformers.LigerJSD` |
331
356
  | Fused Linear JSD | `liger_kernel.transformers.LigerFusedLinearJSD` |
357
+ | TVD | `liger_kernel.transformers.LigerTVDLoss` |
332
358
 
333
359
  ### Experimental Kernels
334
360
 
@@ -340,16 +366,17 @@ loss.backward()
340
366
 
341
367
  ## Contributing, Acknowledgements, and License
342
368
 
343
- - [Contributing Guidelines](https://github.com/linkedin/Liger-Kernel/blob/main/docs/CONTRIBUTING.md)
344
- - [Acknowledgements](https://github.com/linkedin/Liger-Kernel/blob/main/docs/Acknowledgement.md)
345
- - [License Information](https://github.com/linkedin/Liger-Kernel/blob/main/docs/License.md)
369
+ - [Contributing Guidelines](https://github.com/linkedin/Liger-Kernel/blob/main/docs/contributing.md)
370
+ - [Acknowledgements](https://github.com/linkedin/Liger-Kernel/blob/main/docs/acknowledgement.md)
371
+ - [License Information](https://github.com/linkedin/Liger-Kernel/blob/main/docs/license.md)
346
372
 
347
373
  ## Sponsorship and Collaboration
348
-
374
+
375
+ - [Glows.ai](https://platform.glows.ai/): Sponsoring NVIDIA GPUs for our open source developers.
349
376
  - [AMD](https://www.amd.com/en.html): Providing AMD GPUs for our AMD CI.
350
377
  - [Intel](https://www.intel.com/): Providing Intel GPUs for our Intel CI.
351
378
  - [Modal](https://modal.com/): Free 3000 credits from GPU MODE IRL for our NVIDIA CI.
352
- - [EmbeddedLLM](https://embeddedllm.com/): Making Liger Kernel run fast and stable on AMD.
379
+ - [EmbeddedLLM](https://embeddedllm.com/): Making Liger Kernel run fast and stable on AMD.
353
380
  - [HuggingFace](https://huggingface.co/): Integrating Liger Kernel into Hugging Face Transformers and TRL.
354
381
  - [Lightning AI](https://lightning.ai/): Integrating Liger Kernel into Lightning Thunder.
355
382
  - [Axolotl](https://axolotl.ai/): Integrating Liger Kernel into Axolotl.
@@ -359,7 +386,7 @@ loss.backward()
359
386
 
360
387
  - For issues, create a Github ticket in this repository
361
388
  - For open discussion, join [our discord channel](https://discord.gg/gpumode)
362
- - For formal collaboration, send an email to byhsu@linkedin.com
389
+ - For formal collaboration, send an email to yannchen@linkedin.com
363
390
 
364
391
  ## Cite this work
365
392
 
@@ -0,0 +1,74 @@
1
+ liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
3
+ liger_kernel/utils.py,sha256=FtVUkCGBT1UNasTl6HMNycWwiwHayK6tx-ZDdA-sNX4,1884
4
+ liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
5
+ liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
6
+ liger_kernel/chunked_loss/cpo_loss.py,sha256=OdBR8WYdHTKpLI_c9DcuwqKSWPeAAeTyREz46Vu_cAY,3682
7
+ liger_kernel/chunked_loss/dpo_loss.py,sha256=wgjnwzLfrMUwV5mXgrq6G1YfQKWnbiFJegaP48BGJHY,4509
8
+ liger_kernel/chunked_loss/functional.py,sha256=THWWpCnRVhTVfnPnyvQjdBvo1JDtxhwLmtZE_yiBBqM,817
9
+ liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=5V8rdva89WyHVbmJ8JOmC4DYNOR6ByXfx3qlUieOZkI,11002
10
+ liger_kernel/chunked_loss/fused_linear_preference.py,sha256=idK9V9NivoVITqVpiG0fEGUHSvinYWkn9-EYXZjR-KQ,18356
11
+ liger_kernel/chunked_loss/fused_linear_rlhf.py,sha256=sAApL4GQ3YL2F-ymIAF61GCpFfBgFcWF5LB4Gzd7LgY,8044
12
+ liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=ZqYlXXhIphkJPxOS7iI70avgrr6x0skEtgpckZTYau0,9819
13
+ liger_kernel/chunked_loss/grpo_loss.py,sha256=M5qlQR-v5Rh8N3P3dPGNhOKygDFJ4516_rJaVPzU_-c,4980
14
+ liger_kernel/chunked_loss/jsd_loss.py,sha256=yRCQdvd3ruTWP4A_BfU8VcZ6LepSUfO0Ob7stGnueQY,6052
15
+ liger_kernel/chunked_loss/kto_loss.py,sha256=b3ffJyk97e-6XdXd4HFrYyx8wW4A-CU4gOaJSimKLtA,5476
16
+ liger_kernel/chunked_loss/orpo_loss.py,sha256=yjcrrbVeemLYodoSKT-FMSnaPtyKAZ3aOrvPD6tTY6Y,3617
17
+ liger_kernel/chunked_loss/simpo_loss.py,sha256=3TTc7U79Orjgi-Wu81WZkWk5MgsdqKXIOBHgIvDazPw,3865
18
+ liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ liger_kernel/ops/cross_entropy.py,sha256=D6vFFloiuxFXoWfjlIjmfO3tVaWOiYmztw9FKAi5vdU,18608
20
+ liger_kernel/ops/fused_linear_cross_entropy.py,sha256=1Y3Uk_TCSjqKgoG2eot1ptnWXJXXQESqGvOmqAW1gsM,10912
21
+ liger_kernel/ops/fused_linear_jsd.py,sha256=Seshez2qaM6HiTQ8_HEqSwhaeVruNT1SvIM4ZrAPBEU,9602
22
+ liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,4126
23
+ liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
24
+ liger_kernel/ops/jsd.py,sha256=0jNeRxpcNI5ckxCdoCNyO5GEedLIuzx3lz6KAiksc4o,6109
25
+ liger_kernel/ops/kl_div.py,sha256=MnfuYqqQESON1X2Swy064x1urKtMFdgeSWd60VttBXI,8420
26
+ liger_kernel/ops/layer_norm.py,sha256=6roQjioyg-9O2qLPV8nL4U0-5UH80tdzOMTWwjvDnn8,7961
27
+ liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
28
+ liger_kernel/ops/rms_norm.py,sha256=PWLJcdIKU5e-8BuYFHd9Cqlq6wmr6fUXKi9zQD4LetU,11727
29
+ liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
30
+ liger_kernel/ops/swiglu.py,sha256=KmgMjaJQnbLLgZn2nEpbwHU_xpnYRweCyrLQSVvM1vA,3015
31
+ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
32
+ liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
33
+ liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
34
+ liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
35
+ liger_kernel/transformers/__init__.py,sha256=6v_VcV1GQ9ISgNCd-ZxtmEg_s5GTBQ9F-s1KrFkYzPQ,2265
36
+ liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
37
+ liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
38
+ liger_kernel/transformers/functional.py,sha256=ShLD3eb--XKNtllznCrOYTbo4f-1KVwzi0KLMICdrn4,4942
39
+ liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=09Rt7FZzLH42VOcIbQ4dlQd0o3Rlb4vk6fqiOQ7WTD8,1778
40
+ liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
41
+ liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
42
+ liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
43
+ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
44
+ liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
45
+ liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
46
+ liger_kernel/transformers/monkey_patch.py,sha256=g3i3q5McBg23A3Mnviw-Eb32le1hvN7jByzONa9ngcs,44000
47
+ liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
48
+ liger_kernel/transformers/rms_norm.py,sha256=GqCEJuGt0YdqqlMcToE0Wp4A8YFquDa4UUSyH2uFW2A,1191
49
+ liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
50
+ liger_kernel/transformers/swiglu.py,sha256=i9WTqcNRqReU4XJs391IPbl-I5X0wG4T72D4pqGFfJg,2422
51
+ liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
52
+ liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
53
+ liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
54
+ liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ liger_kernel/transformers/model/gemma.py,sha256=ky89b3aWPaeTGRMC-745KgixtQIRXzNAiCORAMLn9yo,9654
56
+ liger_kernel/transformers/model/gemma2.py,sha256=27NcoZjEqP9Lqb4Wf0EKqTbr2HTGiHPhrVyPCRGPz6s,10767
57
+ liger_kernel/transformers/model/llama.py,sha256=3LJFXKFDKvEakaWPc_NicSFst4Y_hdSMrdl1UDK1EcA,10330
58
+ liger_kernel/transformers/model/mistral.py,sha256=MVRksI5_j_8WJu8znOHKCdSI5jSu-S7cdFYzt9m_vIQ,5180
59
+ liger_kernel/transformers/model/mixtral.py,sha256=jpZJkpl625Q-JHWarj2MqT5mRaSsiCtg0c9vVyvOdCY,11430
60
+ liger_kernel/transformers/model/mllama.py,sha256=qWexBdskuN3gPJvPUwt4J0nU675tGD6W7wxgRZ9Bifg,11145
61
+ liger_kernel/transformers/model/olmo2.py,sha256=yyksS6E4fuWd8asEW8rEDBKqZpFmP4ITCM_bjIDZaoY,5124
62
+ liger_kernel/transformers/model/phi3.py,sha256=biRa8fph9qdnQmkD9I21t5XIjpIt1i6UKU4uk8Up8pU,10292
63
+ liger_kernel/transformers/model/qwen2.py,sha256=14UuPjxB-tjqWn85Tn4fqBFvVhVsth5iPEt8kJSMiew,9581
64
+ liger_kernel/transformers/model/qwen2_vl.py,sha256=yMLqsfSYcvhClUpTUjGoADiOxfLB2B8240VdrPP0c8s,9851
65
+ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
66
+ liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
67
+ liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
68
+ liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
69
+ liger_kernel-0.5.4.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
70
+ liger_kernel-0.5.4.dist-info/METADATA,sha256=Zw7n3Ey6vUed4E54H9-TzKmhuOpd9P2ZFMVL-zYUnew,22255
71
+ liger_kernel-0.5.4.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
72
+ liger_kernel-0.5.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
73
+ liger_kernel-0.5.4.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
74
+ liger_kernel-0.5.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,65 +0,0 @@
1
- liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- liger_kernel/env_report.py,sha256=FViyPju795lB6z4k2TZldvBSmQdcS0A2hcnDxepJrDo,1822
3
- liger_kernel/utils.py,sha256=HJa-xVKOohDn6pLVIx-Fv0V9h0QAL3qZGQNRICI-OpI,249
4
- liger_kernel/chunked_loss/__init__.py,sha256=R2wCcz4Y0kTAve926DH3k182XKezpXeACMHj05g9Mm8,346
5
- liger_kernel/chunked_loss/cpo_loss.py,sha256=Qu1Ul2A12sp6CqIT-atPbHWFb_LLtINEA9mOpIRx_0g,3097
6
- liger_kernel/chunked_loss/dpo_loss.py,sha256=H9_RRhclckHYM2sd75tgbnf8IxC_PU2JCALbgtPQvwc,4222
7
- liger_kernel/chunked_loss/functional.py,sha256=9Gr-YXIuEzEJkBUhDx3G2fuQayckLor7cC7svhmPML4,549
8
- liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=2BH6DCPjsR2zS6zcwFPcIIZRhLF8SohjGdKsAJ_301o,10222
9
- liger_kernel/chunked_loss/fused_linear_preference.py,sha256=vlWfaaIECWvCQhY9PM7zRI0vKThIrydMf6P44bXn1EE,15114
10
- liger_kernel/chunked_loss/orpo_loss.py,sha256=ZuKGjbkIYzV4UzvupNdq6vyxCp7-BztQkUt8ZnFvKos,3531
11
- liger_kernel/chunked_loss/simpo_loss.py,sha256=Wa4LOlDG9PbJkOOkKg8hbKvnKgg7OTBz6-qIkwPK1yw,3275
12
- liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- liger_kernel/ops/cross_entropy.py,sha256=oG5hfrlmnlF5lOoZRhHRglObxgH4B0KadjWMJj9EWPM,15860
14
- liger_kernel/ops/fused_linear_cross_entropy.py,sha256=Tnw4gyAYVVdnCOqhOuLEzbUQ3goOTnoAfk3pqSIM5ac,9301
15
- liger_kernel/ops/fused_linear_jsd.py,sha256=nOv4zwfxHqqepKEmMsQuz-B3H-gRjyo8uClpmqSGLYA,9693
16
- liger_kernel/ops/geglu.py,sha256=MQL4zyzneZqZYUGPvb1QjI_EYT9_pKfSDgR25WD9jrI,4127
17
- liger_kernel/ops/group_norm.py,sha256=VaRErVJGR4JqgXXvuIjNGTn3E2egjLtU1y3ymwIf4d8,10961
18
- liger_kernel/ops/jsd.py,sha256=Ap2b0_geCl6fqBXLI1IS6Yn6GlO-8LgPmnOW3y47dus,6151
19
- liger_kernel/ops/kl_div.py,sha256=03FNXfvCb6M-56hhFepAFV9p6brArPR6KOKkdGD34mw,8374
20
- liger_kernel/ops/layer_norm.py,sha256=_CZggw3GNEIUx5weDzadFit5I-Lzosoo8prgeJzcViY,7589
21
- liger_kernel/ops/qwen2vl_mrope.py,sha256=GvP4Cg-2ClYyiqbe7bB_OMvnlZooBmqP2-9V8RMPde4,8598
22
- liger_kernel/ops/rms_norm.py,sha256=g7OXwuYI8-LXudDwvXuiupVjjOsbu8c4wwv83VaHa54,11750
23
- liger_kernel/ops/rope.py,sha256=jrzaA9-6Orn44y_IIam9_YNPQxOFK2FrIRNfFea4EtU,8513
24
- liger_kernel/ops/swiglu.py,sha256=Fwxtd76rhHKT9ShQAGca9RsnASplAVxtYKHmiT73_yA,2994
25
- liger_kernel/ops/utils.py,sha256=_VQvd1PX5JXm5xaiBrk2gANp3qr4kM7qYG3ypkBwkMs,3850
26
- liger_kernel/ops/experimental/embedding.py,sha256=LYR66dB-jhvhtUjeV4PnNro-n77J1mdlmpSLSxB3Y6U,4186
27
- liger_kernel/ops/experimental/mm_int8int2.py,sha256=JpGVZCgRC6T8XMUJ_QbZRS2XU1bh0urIZphs5DTc1mY,13358
28
- liger_kernel/transformers/__init__.py,sha256=gia-eBxr7TLxU0GdDf8AfCY4WgDlFLqIGSt7EoQGsBA,1336
29
- liger_kernel/transformers/auto_model.py,sha256=RMIwQHSiXoksXFTIqFZ4PLBgoqkxJJAT3q1Qh47bGN8,1552
30
- liger_kernel/transformers/cross_entropy.py,sha256=yEm_YQ7oa3_BzT3hdW6KrAslduhSqWcJQVNZZDcWCg4,1758
31
- liger_kernel/transformers/functional.py,sha256=sUBoU8Vb4pLpr9G6IdkRsToYgh-rCXL4OLYat7Tv_GU,4450
32
- liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=_i0PXSp5iZ9pKXdEeZ4lvHCENJYjV4y74yz3ZRG5XQg,1484
33
- liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
34
- liger_kernel/transformers/geglu.py,sha256=QcrME_8ooIn0xa59LaC0aoOdRrBIFd11Y0bAyF0NfCw,1130
35
- liger_kernel/transformers/group_norm.py,sha256=FJ9R7mS9G1wO-GRIQ6QKSmIhnZ6nQ6GIkE4NnX_hnn0,2241
36
- liger_kernel/transformers/jsd.py,sha256=sbr8DnKSYZJH9pv2rpmboNijYGpZKbhb2-WSGp5_v6g,3001
37
- liger_kernel/transformers/kl_div.py,sha256=qVhjBg6tjRyue5iZ3NFxo8uySY4JuIFJyv0IM_50F24,431
38
- liger_kernel/transformers/layer_norm.py,sha256=fd6o4kSHJWolQMWxh-l1qObfgL08ruNbUoBiANKX1ow,972
39
- liger_kernel/transformers/monkey_patch.py,sha256=Fk2v4GZQDJzfh3Cpc6BHNJbs_tungDyWmqS9nuG9Lc4,38406
40
- liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
41
- liger_kernel/transformers/rms_norm.py,sha256=AHstklNIO1PLHjjCBU-TPuUD-Fl_pycJUTLlJNojbV8,1189
42
- liger_kernel/transformers/rope.py,sha256=m-ah8vZBYW8tfplTXCiAPMHJWlB1tdp_JPXJeWE-Boo,943
43
- liger_kernel/transformers/swiglu.py,sha256=0-tVJ8xEYfhxnduc16PflXFj8sZPxdx9sHUn3hfwCI4,2468
44
- liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
45
- liger_kernel/transformers/experimental/embedding.py,sha256=HpckiAMKM8-SRxKDcGTqortVxnjhwpZsfsp9lfjqfeM,895
46
- liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
- liger_kernel/transformers/model/gemma.py,sha256=R4huxuR48gkLrdT8KqV7As2v9dZtEmcGVz6YG1ZmuJE,9692
48
- liger_kernel/transformers/model/gemma2.py,sha256=zxQsxCRqkoxCES3GJPVI7soUuF3J5HZDlvJgaBos1zM,10836
49
- liger_kernel/transformers/model/llama.py,sha256=RinsgC_eR-YNvZd2SHPQxZ4eyR3uViaTFCM3SvI5nks,10426
50
- liger_kernel/transformers/model/mistral.py,sha256=XpL1rlWg_llvW3z_Hf_d8WQs7uQaH4ds7EZ2SxjQHsU,5144
51
- liger_kernel/transformers/model/mixtral.py,sha256=nyDS1dBpsOXYC2DuW59Hgu7ZrGftrHuWPfNqjcNPIxs,11503
52
- liger_kernel/transformers/model/mllama.py,sha256=mesNCgj0Ea1O-fqRD4LVxDJ1CR2abY_zAzK_bfVzkiU,11222
53
- liger_kernel/transformers/model/phi3.py,sha256=xUZPlaPKwknLjHc3uUW3EPodm1h0vD3G7Qnhh51v-Io,10332
54
- liger_kernel/transformers/model/qwen2.py,sha256=EyhSSzQOskGjSnCsKMZpd1s5IAIlHd5PBO3q0MoCs00,9619
55
- liger_kernel/transformers/model/qwen2_vl.py,sha256=bIQe2bWiY--G84FhCD29Gdi64_qHP6vbcGsK6vKysQE,8547
56
- liger_kernel/transformers/trainer/__init__.py,sha256=c4OQVJmhNOloj0JYSEc0j_cQuBbzGWILfaowUR1hmRw,210
57
- liger_kernel/transformers/trainer/orpo_trainer.py,sha256=jko6oq_XQdBSmXubp05E-_YXOyhtB5Bj75dg5YNwOsE,7517
58
- liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
59
- liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
60
- liger_kernel-0.5.2.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
61
- liger_kernel-0.5.2.dist-info/METADATA,sha256=olSIT-Jd2Mowu2ja4QLwyPYBhCnY22znBq9pV7stkKI,20695
62
- liger_kernel-0.5.2.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
63
- liger_kernel-0.5.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
64
- liger_kernel-0.5.2.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
65
- liger_kernel-0.5.2.dist-info/RECORD,,