liger-kernel-nightly 0.5.10.dev20250624183504__py3-none-any.whl → 0.6.3.dev20251121010306__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

Files changed (68) hide show
  1. liger_kernel/chunked_loss/__init__.py +1 -0
  2. liger_kernel/chunked_loss/cosine_similarity_loss.py +136 -0
  3. liger_kernel/chunked_loss/dpo_loss.py +54 -3
  4. liger_kernel/chunked_loss/functional.py +2 -0
  5. liger_kernel/chunked_loss/fused_linear_distillation.py +13 -2
  6. liger_kernel/chunked_loss/fused_linear_ppo.py +4 -0
  7. liger_kernel/chunked_loss/grpo_loss.py +38 -4
  8. liger_kernel/chunked_loss/jsd_loss.py +23 -7
  9. liger_kernel/ops/cross_entropy.py +118 -62
  10. liger_kernel/ops/fused_add_rms_norm.py +412 -0
  11. liger_kernel/ops/fused_linear_cross_entropy.py +113 -21
  12. liger_kernel/ops/geglu.py +1 -1
  13. liger_kernel/ops/layer_norm.py +124 -89
  14. liger_kernel/ops/llama4_rope.py +225 -0
  15. liger_kernel/ops/poly_norm.py +386 -0
  16. liger_kernel/ops/rms_norm.py +2 -2
  17. liger_kernel/ops/rope.py +1 -1
  18. liger_kernel/ops/swiglu.py +1 -1
  19. liger_kernel/ops/tiled_mlp.py +136 -0
  20. liger_kernel/transformers/__init__.py +50 -0
  21. liger_kernel/transformers/cross_entropy.py +8 -3
  22. liger_kernel/transformers/experimental/__init__.py +5 -0
  23. liger_kernel/transformers/functional.py +38 -6
  24. liger_kernel/transformers/fused_add_rms_norm.py +39 -0
  25. liger_kernel/transformers/fused_linear_cross_entropy.py +16 -4
  26. liger_kernel/transformers/llama4_rope.py +93 -0
  27. liger_kernel/transformers/model/falcon_h1.py +122 -0
  28. liger_kernel/transformers/model/gemma.py +28 -8
  29. liger_kernel/transformers/model/gemma2.py +31 -8
  30. liger_kernel/transformers/model/gemma3.py +100 -110
  31. liger_kernel/transformers/model/glm4.py +18 -5
  32. liger_kernel/transformers/model/glm4v.py +163 -0
  33. liger_kernel/transformers/model/glm4v_moe.py +172 -0
  34. liger_kernel/transformers/model/internvl.py +157 -0
  35. liger_kernel/transformers/model/llama.py +26 -7
  36. liger_kernel/transformers/model/llama4.py +121 -0
  37. liger_kernel/transformers/model/llava.py +18 -6
  38. liger_kernel/transformers/model/loss_utils.py +34 -3
  39. liger_kernel/transformers/model/mistral.py +17 -10
  40. liger_kernel/transformers/model/mixtral.py +24 -9
  41. liger_kernel/transformers/model/mllama.py +18 -7
  42. liger_kernel/transformers/model/olmo2.py +18 -5
  43. liger_kernel/transformers/model/output_classes.py +147 -0
  44. liger_kernel/transformers/model/paligemma.py +41 -5
  45. liger_kernel/transformers/model/phi3.py +24 -159
  46. liger_kernel/transformers/model/qwen2.py +26 -4
  47. liger_kernel/transformers/model/qwen2_5_vl.py +21 -8
  48. liger_kernel/transformers/model/qwen2_vl.py +24 -7
  49. liger_kernel/transformers/model/qwen3.py +22 -6
  50. liger_kernel/transformers/model/qwen3_moe.py +27 -7
  51. liger_kernel/transformers/model/qwen3_next.py +146 -0
  52. liger_kernel/transformers/model/qwen3_vl.py +150 -0
  53. liger_kernel/transformers/model/qwen3_vl_moe.py +126 -0
  54. liger_kernel/transformers/model/smollm3.py +199 -0
  55. liger_kernel/transformers/model/smolvlm.py +158 -0
  56. liger_kernel/transformers/monkey_patch.py +1090 -116
  57. liger_kernel/transformers/multi_token_attention.py +1 -1
  58. liger_kernel/transformers/poly_norm.py +42 -0
  59. liger_kernel/transformers/rms_norm.py +7 -0
  60. liger_kernel/transformers/rope.py +43 -0
  61. liger_kernel/transformers/tiled_mlp.py +133 -0
  62. {liger_kernel_nightly-0.5.10.dev20250624183504.dist-info → liger_kernel_nightly-0.6.3.dev20251121010306.dist-info}/METADATA +26 -24
  63. liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/RECORD +116 -0
  64. liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/RECORD +0 -95
  65. {liger_kernel_nightly-0.5.10.dev20250624183504.dist-info → liger_kernel_nightly-0.6.3.dev20251121010306.dist-info}/LICENSE +0 -0
  66. {liger_kernel_nightly-0.5.10.dev20250624183504.dist-info → liger_kernel_nightly-0.6.3.dev20251121010306.dist-info}/NOTICE +0 -0
  67. {liger_kernel_nightly-0.5.10.dev20250624183504.dist-info → liger_kernel_nightly-0.6.3.dev20251121010306.dist-info}/WHEEL +0 -0
  68. {liger_kernel_nightly-0.5.10.dev20250624183504.dist-info → liger_kernel_nightly-0.6.3.dev20251121010306.dist-info}/top_level.txt +0 -0
@@ -9,7 +9,7 @@ from liger_kernel.ops.multi_token_attention import LigerMultiTokenAttentionFunct
9
9
 
10
10
 
11
11
  class LigerMultiTokenAttention(nn.Module):
12
- """
12
+ r"""
13
13
  Multi-Token Attention:
14
14
  out = mask_{0}(conv2d(softmax(mask_{-\inf}(scores))))
15
15
 
@@ -0,0 +1,42 @@
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from liger_kernel.ops.poly_norm import LigerPolyNormFunction
5
+
6
+
7
+ class LigerPolyNorm(nn.Module):
8
+ """
9
+ PolyNorm layer wrapper for Liger kernel.
10
+
11
+ PolyNorm formula:
12
+ y = w₀·norm(x³) + w₁·norm(x²) + w₂·norm(x) + b
13
+ where norm(u) = u / sqrt(mean(u²) + ε)
14
+
15
+ Reference:
16
+ https://github.com/BryceZhuo/PolyCom/
17
+
18
+ Args:
19
+ eps: epsilon for numerical stability (default: 1e-6)
20
+ in_place: whether to in-place modify grad_output in backward to save memory (default: False).
21
+ Set to True to save memory if grad_output is not needed elsewhere.
22
+ """
23
+
24
+ def __init__(self, eps=1e-6, in_place=True):
25
+ super().__init__()
26
+ # Align with PolyCom reference: initialize weights to (1/3, 1/3, 1/3) and bias to 1.0
27
+ self.weight = nn.Parameter(torch.full((3,), 1.0 / 3.0))
28
+ self.bias = nn.Parameter(torch.tensor(1.0))
29
+ self.variance_epsilon = eps
30
+ self.in_place = in_place
31
+
32
+ def forward(self, hidden_states):
33
+ return LigerPolyNormFunction.apply(
34
+ hidden_states,
35
+ self.weight,
36
+ self.bias,
37
+ self.variance_epsilon,
38
+ self.in_place,
39
+ )
40
+
41
+ def extra_repr(self):
42
+ return f"weight_shape={tuple(self.weight.shape)}, eps={self.variance_epsilon}, in_place={self.in_place}"
@@ -77,3 +77,10 @@ class LigerRMSNormForGlm4(LigerRMSNorm):
77
77
  self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", init_fn="ones", in_place=False, row_mode=None
78
78
  ):
79
79
  super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
80
+
81
+
82
+ class LigerRMSNormForQwen3Next(LigerRMSNorm):
83
+ def __init__(
84
+ self, hidden_size, eps=1e-6, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=False, row_mode=None
85
+ ):
86
+ super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
@@ -1,3 +1,8 @@
1
+ from typing import Optional
2
+ from typing import Tuple
3
+
4
+ import torch
5
+
1
6
  from liger_kernel.ops.rope import LigerRopeFunction
2
7
 
3
8
 
@@ -18,3 +23,41 @@ def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
18
23
  """
19
24
 
20
25
  return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
26
+
27
+
28
+ def liger_rotary_pos_emb_with_cast(
29
+ q: torch.Tensor,
30
+ k: torch.Tensor,
31
+ cos: torch.Tensor,
32
+ sin: torch.Tensor,
33
+ position_ids: Optional[torch.Tensor] = None,
34
+ unsqueeze_dim: int = 1,
35
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
36
+ orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
37
+
38
+ q32 = q.to(torch.float32)
39
+ k32 = k.to(torch.float32)
40
+ cos32 = cos.to(torch.float32)
41
+ sin32 = sin.to(torch.float32)
42
+
43
+ q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
44
+ return q_out.to(orig_q_dtype), k_out.to(orig_k_dtype)
45
+
46
+
47
+ def liger_rotary_pos_emb_with_cast_and_leading_batch(
48
+ q: torch.Tensor,
49
+ k: torch.Tensor,
50
+ cos: torch.Tensor,
51
+ sin: torch.Tensor,
52
+ position_ids: Optional[torch.Tensor] = None,
53
+ unsqueeze_dim: int = 1,
54
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
55
+ orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
56
+
57
+ q32 = q.to(torch.float32).unsqueeze(0)
58
+ k32 = k.to(torch.float32).unsqueeze(0)
59
+ cos32 = cos.to(torch.float32).unsqueeze(0)
60
+ sin32 = sin.to(torch.float32).unsqueeze(0)
61
+
62
+ q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
63
+ return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)
@@ -0,0 +1,133 @@
1
+ from typing import Optional
2
+
3
+ import torch.nn as nn
4
+
5
+ from liger_kernel.ops.geglu import LigerGELUMulFunction
6
+ from liger_kernel.ops.swiglu import LigerSiLUMulFunction
7
+ from liger_kernel.ops.tiled_mlp import apply_tiled_mlp
8
+
9
+
10
+ class LigerTiledGEGLUMLP(nn.Module):
11
+ """
12
+ Memory-efficient GEGLU MLP using tiled computation.
13
+
14
+ This module combines GEGLU activation with tiled processing to handle
15
+ very long sequences efficiently. The forward pass is recomputed during
16
+ backward to save memory.
17
+
18
+ Args:
19
+ config: Model configuration with hidden_size and intermediate_size attributes
20
+ num_shards: Number of shards to split the sequence. If None, automatically
21
+ calculated as ceil(seqlen / hidden_size)
22
+ """
23
+
24
+ def __init__(self, config, num_shards: Optional[int] = None):
25
+ super().__init__()
26
+ self.config = config
27
+ self.hidden_size = config.hidden_size
28
+ self.intermediate_size = config.intermediate_size
29
+ self.num_shards = num_shards
30
+
31
+ self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
32
+ self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
33
+ self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
34
+
35
+ # Validate activation function
36
+ if hasattr(config, "hidden_act") and config.hidden_act not in [
37
+ "gelu",
38
+ "gelu_new",
39
+ "gelu_pytorch_tanh",
40
+ ]:
41
+ raise ValueError(f"LigerTiledGEGLUMLP requires GELU activation, got {config.hidden_act}")
42
+
43
+ def _mlp_forward(self, module, x):
44
+ """Internal MLP forward function for tiled computation."""
45
+ gate = module.gate_proj(x)
46
+ up = module.up_proj(x)
47
+ return module.down_proj(LigerGELUMulFunction.apply(gate, up))
48
+
49
+ def forward(self, x):
50
+ """
51
+ Forward pass with tiled computation.
52
+
53
+ Args:
54
+ x: Input tensor of shape [batch_size, seq_len, hidden_size]
55
+ or [seq_len, hidden_size]
56
+
57
+ Returns:
58
+ Output tensor of the same shape as input
59
+ """
60
+ compute_params = [
61
+ self.gate_proj.weight,
62
+ self.up_proj.weight,
63
+ self.down_proj.weight,
64
+ ]
65
+
66
+ return apply_tiled_mlp(
67
+ fn=self._mlp_forward,
68
+ mlp_module=self,
69
+ x=x,
70
+ num_shards=self.num_shards,
71
+ compute_params=compute_params,
72
+ )
73
+
74
+
75
+ class LigerTiledSwiGLUMLP(nn.Module):
76
+ """
77
+ Memory-efficient SwiGLU MLP using tiled computation.
78
+
79
+ This module combines SwiGLU activation with tiled processing to handle
80
+ very long sequences efficiently. The forward pass is recomputed during
81
+ backward to save memory.
82
+
83
+ Args:
84
+ config: Model configuration with hidden_size and intermediate_size attributes
85
+ num_shards: Number of shards to split the sequence. If None, automatically
86
+ calculated as ceil(seqlen / hidden_size)
87
+ """
88
+
89
+ def __init__(self, config, num_shards: Optional[int] = None):
90
+ super().__init__()
91
+ self.config = config
92
+ self.hidden_size = config.hidden_size
93
+ self.intermediate_size = config.intermediate_size
94
+ self.num_shards = num_shards
95
+
96
+ self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
97
+ self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
98
+ self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
99
+
100
+ # Validate activation function
101
+ if hasattr(config, "hidden_act") and config.hidden_act not in ["silu", "swish"]:
102
+ raise ValueError(f"LigerTiledSwiGLUMLP requires SiLU/Swish activation, got {config.hidden_act}")
103
+
104
+ def _mlp_forward(self, module, x):
105
+ """Internal MLP forward function for tiled computation."""
106
+ gate = module.gate_proj(x)
107
+ up = module.up_proj(x)
108
+ return module.down_proj(LigerSiLUMulFunction.apply(gate, up))
109
+
110
+ def forward(self, x):
111
+ """
112
+ Forward pass with tiled computation.
113
+
114
+ Args:
115
+ x: Input tensor of shape [batch_size, seq_len, hidden_size]
116
+ or [seq_len, hidden_size]
117
+
118
+ Returns:
119
+ Output tensor of the same shape as input
120
+ """
121
+ compute_params = [
122
+ self.gate_proj.weight,
123
+ self.up_proj.weight,
124
+ self.down_proj.weight,
125
+ ]
126
+
127
+ return apply_tiled_mlp(
128
+ fn=self._mlp_forward,
129
+ mlp_module=self,
130
+ x=x,
131
+ num_shards=self.num_shards,
132
+ compute_params=compute_params,
133
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250624183504
3
+ Version: 0.6.3.dev20251121010306
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -33,17 +33,16 @@ License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
- Requires-Dist: transformers>=4.44.2; extra == "dev"
36
+ Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
- Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
- Requires-Dist: black>=24.4.2; extra == "dev"
40
- Requires-Dist: isort>=5.13.2; extra == "dev"
38
+ Requires-Dist: ruff>=0.12.0; extra == "dev"
41
39
  Requires-Dist: pytest>=7.1.2; extra == "dev"
42
40
  Requires-Dist: pytest-xdist; extra == "dev"
41
+ Requires-Dist: pytest-cov; extra == "dev"
42
+ Requires-Dist: pytest-asyncio; extra == "dev"
43
43
  Requires-Dist: pytest-rerunfailures; extra == "dev"
44
44
  Requires-Dist: datasets>=2.19.2; extra == "dev"
45
45
  Requires-Dist: seaborn; extra == "dev"
46
- Requires-Dist: mkdocs; extra == "dev"
47
46
  Requires-Dist: mkdocs-material; extra == "dev"
48
47
  Requires-Dist: torchvision>=0.20; extra == "dev"
49
48
 
@@ -81,7 +80,7 @@ Requires-Dist: torchvision>=0.20; extra == "dev"
81
80
  </td>
82
81
  <td style="padding: 10px;">
83
82
  <a href="https://discord.gg/gpumode">
84
- <img src="https://dcbadge.vercel.app/api/server/gpumode?style=flat" alt="Join Our Discord">
83
+ <img src="https://dcbadge.limes.pink/api/server/gpumode?style=flat" alt="Join Our Discord">
85
84
  </a>
86
85
  </td>
87
86
  </tr>
@@ -178,8 +177,8 @@ y = orpo_loss(lm_head.weight, x, target)
178
177
  - `triton >= 3.0.0` Install from pypi. (e.g. `pip install triton==3.0.0`)
179
178
 
180
179
  ```bash
181
- # Need to pass the url when installing
182
- pip install -e .[dev] --extra-index-url https://download.pytorch.org/whl/nightly/rocm6.2
180
+ pip install -e .[dev]
181
+ pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
183
182
  ```
184
183
 
185
184
  ### Optional Dependencies
@@ -213,6 +212,9 @@ pip install -e .
213
212
 
214
213
  # Setup Development Dependencies
215
214
  pip install -e ".[dev]"
215
+
216
+ # NOTE -> For AMD users only
217
+ pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm6.3/
216
218
  ```
217
219
 
218
220
 
@@ -290,6 +292,7 @@ loss.backward()
290
292
 
291
293
  | **Model** | **API** | **Supported Operations** |
292
294
  |-------------|--------------------------------------------------------------|-------------------------------------------------------------------------|
295
+ | Llama4 (Text) & (Multimodal) | `liger_kernel.transformers.apply_liger_kernel_to_llama4` | RMSNorm, LayerNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
293
296
  | LLaMA 2 & 3 | `liger_kernel.transformers.apply_liger_kernel_to_llama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
294
297
  | LLaMA 3.2-Vision | `liger_kernel.transformers.apply_liger_kernel_to_mllama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
295
298
  | Mistral | `liger_kernel.transformers.apply_liger_kernel_to_mistral` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
@@ -303,11 +306,12 @@ loss.backward()
303
306
  | Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
304
307
  | Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
305
308
  | Qwen3 | `liger_kernel.transformers.apply_liger_kernel_to_qwen3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
306
- | Qwen3 MoE | `liger_kernel_transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
309
+ | Qwen3 MoE | `liger_kernel.transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
307
310
  | Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
308
311
  | Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
309
312
  | OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
310
313
  | GLM-4 | `liger_kernel.transformers.apply_liger_kernel_to_glm4` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
314
+ | InternVL3 | `liger_kernel.transformers.apply_liger_kernel_to_internvl` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
311
315
 
312
316
 
313
317
  ## Low-level APIs
@@ -387,17 +391,17 @@ loss.backward()
387
391
  <td style="padding: 10px;">
388
392
  <div style="display: block;">
389
393
  <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
390
- <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
394
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?branch=main&event=push" alt="Build">
391
395
  </a>
392
396
  </div>
393
397
  <div style="display: block;">
394
398
  <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
395
- <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
399
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?branch=main&event=push" alt="Build">
396
400
  </a>
397
401
  </div>
398
402
  <div style="display: block;">
399
- <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
400
- <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
403
+ <a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml">
404
+ <img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?branch=main&event=push" alt="Build">
401
405
  </a>
402
406
  </div>
403
407
  </td>
@@ -410,21 +414,19 @@ loss.backward()
410
414
 
411
415
  - For issues, create a Github ticket in this repository
412
416
  - For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
413
- - For formal collaboration, send an email to yannchen@linkedin.com and hning@linkedin.com
417
+ - For formal collaboration, send an email to Yanning Chen(yannchen@linkedin.com) and Zhipeng Wang(zhipwang@linkedin.com)
414
418
 
415
419
  ## Cite this work
416
420
 
417
421
  Biblatex entry:
418
422
  ```bib
419
- @article{hsu2024ligerkernelefficienttriton,
420
- title={Liger Kernel: Efficient Triton Kernels for LLM Training},
421
- author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},
422
- year={2024},
423
- eprint={2410.10989},
424
- archivePrefix={arXiv},
425
- primaryClass={cs.LG},
426
- url={https://arxiv.org/abs/2410.10989},
427
- journal={arXiv preprint arXiv:2410.10989},
423
+ @inproceedings{
424
+ hsu2025ligerkernel,
425
+ title={Liger-Kernel: Efficient Triton Kernels for {LLM} Training},
426
+ author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen and Zhipeng Wang},
427
+ booktitle={Championing Open-source DEvelopment in ML Workshop @ ICML25},
428
+ year={2025},
429
+ url={https://openreview.net/forum?id=36SjAIT42G}
428
430
  }
429
431
  ```
430
432
 
@@ -0,0 +1,116 @@
1
+ liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
3
+ liger_kernel/utils.py,sha256=BQleeZWHSZPNuPcYcoZTOp1kcNEZONZilPP5-AmjgWI,2024
4
+ liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
5
+ liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
6
+ liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=x2nprTHPraU8Ya2NMZtaDk9r-s-1NKJwCTrzQIdmg-8,4680
7
+ liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
8
+ liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
9
+ liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
10
+ liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=yRtolfFGfKB-SxGQQyF68GYXd11Zlvh1InLdGeWNFIE,12652
11
+ liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=ZjpNP5VC-tXXIKb4AckkQ3iWWQeej-JoG4StJq3N0wg,13650
12
+ liger_kernel/chunked_loss/fused_linear_preference.py,sha256=FIH85uUXAOgYx5Ax8MjFhJHVu-2pKtY7wSegd0zSyyY,18336
13
+ liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
14
+ liger_kernel/chunked_loss/grpo_loss.py,sha256=SkZuKoW8K94UbWR-OtfopsQkuQ8tFOr_90AGR6_Mhes,12844
15
+ liger_kernel/chunked_loss/jsd_loss.py,sha256=G0RghPYYelyZ6DOEiwS8we9TT5MY2iHpiFqzZ2Xy87g,8038
16
+ liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
17
+ liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
18
+ liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
19
+ liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ liger_kernel/ops/cross_entropy.py,sha256=-fd8qVxn_66MGSLs-Gs8yGmWlkET5YAoyb__Bolfz4c,22617
21
+ liger_kernel/ops/dyt.py,sha256=gCLz4S8aul8SY9nvIGaoK67aGb7U9MJRQdo3ONqmQYs,5417
22
+ liger_kernel/ops/fused_add_rms_norm.py,sha256=UBqmlqFCmhSAIpkNKd8rrfXatX7Z4J9bp2dX9A0lrJQ,14017
23
+ liger_kernel/ops/fused_linear_cross_entropy.py,sha256=YepeWqX37gKc1-FUrzkDTzXYdOvmBmfv4KgL__KN_UI,16158
24
+ liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
25
+ liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
26
+ liger_kernel/ops/geglu.py,sha256=r0WSq9E93zzynL44Wh8femzOWK07_SseBM_pJUyxT3s,4144
27
+ liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
28
+ liger_kernel/ops/grpo_loss.py,sha256=anRnv7k1-AV3pCC6_TqP0GMg78YYUfRAJrbpx6PVhl0,9448
29
+ liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
30
+ liger_kernel/ops/kl_div.py,sha256=ZjGdDLKWksHT9dZ0xF_TDgAkj5cuMTwwT5tr9E-_24o,8734
31
+ liger_kernel/ops/layer_norm.py,sha256=WmiORsIyufOhazmYZTPjeSc5Z-xTAYwXAKqUcCv_dlY,9807
32
+ liger_kernel/ops/llama4_rope.py,sha256=-aqdZzllklTN8b9--e-TsWY_ntGCN8-tyseT4x0bd8s,8223
33
+ liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
34
+ liger_kernel/ops/poly_norm.py,sha256=MLgI8Ea93fugKibHCUauQ2ASYVXCvpPZe5v3kQZU6po,11152
35
+ liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
36
+ liger_kernel/ops/rms_norm.py,sha256=DtvsWN5YktFAoc0JYSAwVeoZfryBFJlX-ipU7ooP01A,18891
37
+ liger_kernel/ops/rope.py,sha256=v-7JHRrv-5ImoROkpKfl30WwWI4qTa2tAl7zQeB4ml4,8956
38
+ liger_kernel/ops/softmax.py,sha256=tgORx6MK1IDDtZKqGarj0IPIVjqAIEUXXYPiinhRdtI,5864
39
+ liger_kernel/ops/sparsemax.py,sha256=AeWe1xgkHJFEKWTj2vu_0hj7LztGvjqXAps-QTpCY0U,5087
40
+ liger_kernel/ops/swiglu.py,sha256=D7nd4u_LInwsIRNCDdY77lqnTz8-W5dJrpEAt8zEO_A,3033
41
+ liger_kernel/ops/tiled_mlp.py,sha256=eyMFsFFgHch8a_6R6IYRG24_jqKg5GF_BQUoQuAG8SY,4529
42
+ liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
43
+ liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
44
+ liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
45
+ liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
46
+ liger_kernel/transformers/__init__.py,sha256=XX1ySRgZXeQe0or-6GNclAsNQG_VkABQlkwqpB1Wn8A,10090
47
+ liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
48
+ liger_kernel/transformers/cross_entropy.py,sha256=DMtHkKrVJDSsels7KgGQJqrXkEAd6Zopcdr-5oRmQgE,2010
49
+ liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
50
+ liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
51
+ liger_kernel/transformers/functional.py,sha256=OqEmsDkaV3YiXaw1zqjDvHcC9_tU5TBrmhCNPOdgHQY,8590
52
+ liger_kernel/transformers/fused_add_rms_norm.py,sha256=7_Bzg-x6lLe6W1qG2DtjDALhEpNZlC6N5GppEs9cTYY,1199
53
+ liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=Hhp9XGgMKZhvlkjHY5Jkl_T7fSyJoCL9m5c3z_9mflQ,2347
54
+ liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
55
+ liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
56
+ liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
57
+ liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
58
+ liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-QxyaT8zhM,3897
59
+ liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
60
+ liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
61
+ liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
62
+ liger_kernel/transformers/llama4_rope.py,sha256=kS6PSHEwf3dS7hD7C7p8S0geugx2EMCiP0h0F7LsUoY,3639
63
+ liger_kernel/transformers/monkey_patch.py,sha256=ZGnLygHuCiKGd6hT-C0pt1aY85f6GNFdV98oCDpxHHo,124742
64
+ liger_kernel/transformers/multi_token_attention.py,sha256=K3NIY9_5TPgZ4_Rahn0xnkMXxD_fmlJHK4CWGYvGQp0,1752
65
+ liger_kernel/transformers/poly_norm.py,sha256=g5tC75i3qy1_N26ZUP-jfpct7ivQAEdJfIfx8IXzeyE,1377
66
+ liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
67
+ liger_kernel/transformers/rms_norm.py,sha256=HwddVqrqS58jE-M2_4NkFGARtCDBhGnkKyjBN9b3FYI,3004
68
+ liger_kernel/transformers/rope.py,sha256=VMlDZI6zss9mLaLcN5XCE_ktmYRwAi_Eh4TIgO6NrIQ,2361
69
+ liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
70
+ liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
71
+ liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
72
+ liger_kernel/transformers/tiled_mlp.py,sha256=J51-kpzwikDMMhT5bX-RZCKMaXBK6zZc1bhgRYTK5F0,4651
73
+ liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
74
+ liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
75
+ liger_kernel/transformers/experimental/__init__.py,sha256=oQqk-f32JYgWEP9DJCj6ty6bbJSGrdXsFDQFwGeX6vI,127
76
+ liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
77
+ liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ liger_kernel/transformers/model/falcon_h1.py,sha256=heUZ4wUt2ATmtBtmv8Rcro3pQl6fV9T0pburjTTW7os,5004
79
+ liger_kernel/transformers/model/gemma.py,sha256=pAri4PYpknsFfkvyo8Ez2NNlqrUDW-KkExUXTGZAcH4,10621
80
+ liger_kernel/transformers/model/gemma2.py,sha256=qa9Ok42vFojVGNmASTH3Ek566Vu507kjd--ZpZDKX9M,12024
81
+ liger_kernel/transformers/model/gemma3.py,sha256=mEV3Kuy-dqfTk_b899Vb-InuD4_DvwH0nm5xgbG-0MM,14911
82
+ liger_kernel/transformers/model/glm4.py,sha256=bSp22iPIjsli4-c_usUOsyh1Bs2gIK8X6ynS0azseUs,5900
83
+ liger_kernel/transformers/model/glm4v.py,sha256=dd-BQpccDCp1SbIxcJ5rG8xcwYQK3KOv1Tgm9TGnZc4,6594
84
+ liger_kernel/transformers/model/glm4v_moe.py,sha256=zKhMdOOrRhlrvCSFaeVYfddL1ubpY8edEO91TN81n98,7135
85
+ liger_kernel/transformers/model/internvl.py,sha256=OOutracs9qrPHSU7FVYar08yinvGrHQVPvo39JEws6w,6473
86
+ liger_kernel/transformers/model/llama.py,sha256=kqZeONzwTBzudoChlKMzq1w23BtYGbxWZC1l1V__JTw,13410
87
+ liger_kernel/transformers/model/llama4.py,sha256=PfkynGVI0xxMs3EtyYpCgaALI6stu25OIrTIymE-pvg,4853
88
+ liger_kernel/transformers/model/llava.py,sha256=yoADM_BuIEummtTDiwWqjfUjXUMZD78VJzS0TRj5GJ4,15687
89
+ liger_kernel/transformers/model/loss_utils.py,sha256=mAV6NsE1xR2smQMlr_n9afh4ek3BhIfieZdTn1Z-9Fw,2836
90
+ liger_kernel/transformers/model/mistral.py,sha256=OcwOzVDMwwDbVccVPv-AaocznzWwzLT3aRaKK5SMaAg,6030
91
+ liger_kernel/transformers/model/mixtral.py,sha256=YcBDoTEJDgLFJ_RTo180DYGxR8D5Ad9-idumif7kCPE,12130
92
+ liger_kernel/transformers/model/mllama.py,sha256=vAHwCm63sn4kpAY0rDGf_N0HR7KRTBVpBYDVTPOaZTg,12079
93
+ liger_kernel/transformers/model/olmo2.py,sha256=-h2bUOeuPfY1MdShdRvq5_wFDHKP4PEimgIl0fL-BT4,5902
94
+ liger_kernel/transformers/model/output_classes.py,sha256=0BGXVR4dYQpSHLkSqpRoXuHMryrceGSlTYRu6pvd8ZY,4542
95
+ liger_kernel/transformers/model/paligemma.py,sha256=r0smHLADkEwfLS6d6ArWoSWEeLt2d_8pmgOO5F04b1o,20793
96
+ liger_kernel/transformers/model/phi3.py,sha256=PT7Kw6yySg-7TsssWfi82eVMN3SWujCqzCqHigAdfeQ,4574
97
+ liger_kernel/transformers/model/qwen2.py,sha256=ojqdJpD3A9A5uCS0N_rSq8gyNYWSsHfuvx3Z3ObC7ss,10686
98
+ liger_kernel/transformers/model/qwen2_5_vl.py,sha256=FbIZDcg9cOr4PtBLNN8yVubN-gu2clndjSIzfi8NMos,6894
99
+ liger_kernel/transformers/model/qwen2_vl.py,sha256=967Ex4Scm0ehhiVxOtjwfj396nD9xkAwFwHcoURH6-o,6578
100
+ liger_kernel/transformers/model/qwen3.py,sha256=1fvioVmq5CRZSIuTd7uuLet-fti9ee3r8eLibvfNTcQ,5769
101
+ liger_kernel/transformers/model/qwen3_moe.py,sha256=yljJO4kyeM5U2Q4pXH3Mmq71ZFEC_Z73qgBx1-an-o8,6457
102
+ liger_kernel/transformers/model/qwen3_next.py,sha256=TayfD91GVLA1-fJwtVl6vMZgkUTYLQYURMRGBdCtnFc,6331
103
+ liger_kernel/transformers/model/qwen3_vl.py,sha256=sUIdJ-32IlFm_4pHv6PpLgVafqBS0QeJm_91tY67NdY,6646
104
+ liger_kernel/transformers/model/qwen3_vl_moe.py,sha256=CJEFcwBqItSEw9NA0mhEozlDTgIuJQ6VTjgkh5iLZ78,4856
105
+ liger_kernel/transformers/model/smollm3.py,sha256=1ewDY-99UAFJEfoeqfZxDcxjkqKYUSr5b7X-E_2BLLs,8126
106
+ liger_kernel/transformers/model/smolvlm.py,sha256=yFpPKawLVo3zXzLjM7Y_T8FyRrPxVyp-YPFMM8m3k0c,6734
107
+ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
108
+ liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
109
+ liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
110
+ liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
111
+ liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
112
+ liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/METADATA,sha256=HgCaZORIkj1lSLvj1vsjOJ0r9ouWZ-lqPCQ3JrJJMFU,24777
113
+ liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
114
+ liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
115
+ liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
116
+ liger_kernel_nightly-0.6.3.dev20251121010306.dist-info/RECORD,,
@@ -1,95 +0,0 @@
1
- liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
3
- liger_kernel/utils.py,sha256=BQleeZWHSZPNuPcYcoZTOp1kcNEZONZilPP5-AmjgWI,2024
4
- liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
5
- liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
6
- liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
7
- liger_kernel/chunked_loss/dpo_loss.py,sha256=tapMiNdI8_ufW55iG0Ud4dmiW39gu1DzlvtoOCHrdGg,6259
8
- liger_kernel/chunked_loss/functional.py,sha256=9G3nKm-Bi7uoZRFkL8wwGMl6juDl4bSzDvTa5GHZPzg,955
9
- liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
10
- liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=AA19cpv6D8mo5RbSK5GRCcZoOSnpxV_Z1eJlAsC5eic,13434
11
- liger_kernel/chunked_loss/fused_linear_preference.py,sha256=FIH85uUXAOgYx5Ax8MjFhJHVu-2pKtY7wSegd0zSyyY,18336
12
- liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
13
- liger_kernel/chunked_loss/grpo_loss.py,sha256=kuqHkYV383sUxqJN-DMsfADHi2hxHVyKx5S24TNc8bQ,10866
14
- liger_kernel/chunked_loss/jsd_loss.py,sha256=uInjy-KtKNJs46Wk0AlMO9e3UYo33KJhoCl8KL8ypGU,7081
15
- liger_kernel/chunked_loss/kto_loss.py,sha256=llVCe6DkcpCo57seGWoMikaQVFApx764jsmSbQyqwQY,7529
16
- liger_kernel/chunked_loss/orpo_loss.py,sha256=nu9UYG16dcMw93lvHi4_hYs3Q0FK1KnlmMRj7OpYU8s,4872
17
- liger_kernel/chunked_loss/simpo_loss.py,sha256=fy2w8KbhMrBv7b1jdIeH3bBFxY52bPQPZb3KwBvmurM,5385
18
- liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- liger_kernel/ops/cross_entropy.py,sha256=e8THGnhOcy_0SbOLABx67HEM7-B8a8pG7nDKbCRpQKM,19123
20
- liger_kernel/ops/dyt.py,sha256=gCLz4S8aul8SY9nvIGaoK67aGb7U9MJRQdo3ONqmQYs,5417
21
- liger_kernel/ops/fused_linear_cross_entropy.py,sha256=5fbGhN85n3zf0uIdJ7PYHWIRzTf0VTFiS0ARtOmqIP0,11020
22
- liger_kernel/ops/fused_linear_jsd.py,sha256=CSoprxb-YcJy-YUKiTcYkxN8sb9h2kdk_iHuncvSV5c,9683
23
- liger_kernel/ops/fused_neighborhood_attention.py,sha256=vPi5xbnh6wxyZehaqo6Tuilqo2fN5SGDiONjnNmIKqs,35556
24
- liger_kernel/ops/geglu.py,sha256=axGvCIvlBzuluoAIrWTsp2iZM4BFKNInkPov8YVvH9E,4126
25
- liger_kernel/ops/group_norm.py,sha256=qD4D4lSjSgVtO52EBNLC2iTseALRgPgqXE50U2woggk,10837
26
- liger_kernel/ops/grpo_loss.py,sha256=anRnv7k1-AV3pCC6_TqP0GMg78YYUfRAJrbpx6PVhl0,9448
27
- liger_kernel/ops/jsd.py,sha256=onHp5T3MbvJaVz5Vup7Ww6EQp_HTaZeayTjJk6FgQMY,7042
28
- liger_kernel/ops/kl_div.py,sha256=ZjGdDLKWksHT9dZ0xF_TDgAkj5cuMTwwT5tr9E-_24o,8734
29
- liger_kernel/ops/layer_norm.py,sha256=vWCyOm-F2GMAilB-ozJcFeUQQLCJoTE_uiXq-_0uYuI,8356
30
- liger_kernel/ops/multi_token_attention.py,sha256=Oz_RXDp-OSS_R_HuGmaETHdAJ7Toda_70OfE7TXMUlY,7645
31
- liger_kernel/ops/qwen2vl_mrope.py,sha256=3GExhYpLgB4VUtyZyjRk8XjEur3W4EWF6HQ67ML5vBU,8481
32
- liger_kernel/ops/rms_norm.py,sha256=-rcgHwWCxlA-Syec2XhdW4jfOeCDt2r7qwjslgXFYDU,18865
33
- liger_kernel/ops/rope.py,sha256=ofmBOkUpZZO-Q8Z5B_LOFYYLD-YT-8WnJ4vGOrDYouI,8943
34
- liger_kernel/ops/softmax.py,sha256=tgORx6MK1IDDtZKqGarj0IPIVjqAIEUXXYPiinhRdtI,5864
35
- liger_kernel/ops/sparsemax.py,sha256=AeWe1xgkHJFEKWTj2vu_0hj7LztGvjqXAps-QTpCY0U,5087
36
- liger_kernel/ops/swiglu.py,sha256=KmgMjaJQnbLLgZn2nEpbwHU_xpnYRweCyrLQSVvM1vA,3015
37
- liger_kernel/ops/tvd.py,sha256=FHJtLQI95ijqgg9UtaHpMAjSCiPxB6CduPwPMcGxelc,6405
38
- liger_kernel/ops/utils.py,sha256=uoFKQqo-34N2TWQNvXMFywqGiOMMXNEVBxVojzlUAa0,3836
39
- liger_kernel/ops/experimental/embedding.py,sha256=tolj3tItkzpSb30zWqDN2_yX4ectflaQ8HMyKyFIQc8,4172
40
- liger_kernel/ops/experimental/mm_int8int2.py,sha256=TrS9lpwekrik_w5qE7AhMJD1bcq-OidjtbsW80oZ6IM,13314
41
- liger_kernel/transformers/__init__.py,sha256=0KX0rxyy0E_uNWVE0PSTzEVzKqc5KdFHtvdHhJm23Kk,7077
42
- liger_kernel/transformers/auto_model.py,sha256=0qCTRZt280Bj_LcFdzo9hlaR-BWNazawXOGgoCZjgEg,1545
43
- liger_kernel/transformers/cross_entropy.py,sha256=z3KTWQnFxr_IZaVjtYt0ZNEWQdDdYThN35xWkHlDGH0,1683
44
- liger_kernel/transformers/dyt.py,sha256=i-4GPaMrl-jab9TVI5qN0-H9qycn_mCbV82ozU4nbmU,723
45
- liger_kernel/transformers/fsdp.py,sha256=CUiyjTmjkjY7pLXQv8ly9rnzgXw6529csd9pvtJNMYc,3096
46
- liger_kernel/transformers/functional.py,sha256=7Emw7D6VPMg8hfasC33NiolvKmQVF1gV6VayKQCEWJM,7446
47
- liger_kernel/transformers/fused_linear_cross_entropy.py,sha256=O8Sg5BT81nTaY9fSGoOY9dOD9ekibwwiuXhdUHaxntQ,1742
48
- liger_kernel/transformers/fused_linear_jsd.py,sha256=bZ4otCvWBuOnA5XdQL-FzZVItJlDt-ht9e_pG7PG93E,3999
49
- liger_kernel/transformers/fused_neighborhood_attention.py,sha256=TxYDUAt9B6WSP14aJP66C_2Mbds2sSIPGnamhUSTrC8,7957
50
- liger_kernel/transformers/geglu.py,sha256=mrgqzIUVd6lN7fkDKLkw5YaESDxDtFgbot430WwPVOQ,1107
51
- liger_kernel/transformers/group_norm.py,sha256=6qMAWOprr4SzP0YhNVNGQIBpM5aUHplUD2VuGJrMBz0,2173
52
- liger_kernel/transformers/grpo_loss.py,sha256=uAkUNKSnUGEOqa82L9w2e6AI1kcmG8K45-QxyaT8zhM,3897
53
- liger_kernel/transformers/jsd.py,sha256=DGqRnxIZxsvxo0_tbbxX3b-sDbDjC_yKufyRIHCcScY,2979
54
- liger_kernel/transformers/kl_div.py,sha256=WLffFbh1EExD2Eb1F7lN11fo9JJC-0751WJjZAF1Fj8,409
55
- liger_kernel/transformers/layer_norm.py,sha256=c9pk3PEasOKYR0rhe5e5nNrnYKVCEW4VC8S6LpCq9EQ,906
56
- liger_kernel/transformers/monkey_patch.py,sha256=FacsJGO8MUbn1I7HttRAlDGhLtD8jjRTXSAwvI0iPrg,77754
57
- liger_kernel/transformers/multi_token_attention.py,sha256=l9VDICK0dfmifUDW668hGscP8AHq2rYcM2oGUa3baRQ,1751
58
- liger_kernel/transformers/qwen2vl_mrope.py,sha256=5EwSqrMdsL9MYspeBMXBsNJKvH0MOmRrtJXAJlnnlOI,1047
59
- liger_kernel/transformers/rms_norm.py,sha256=vkekcvTeWY8vL4H6hg3t0XeY0Ew_3OFMPHuzqlxPPVw,2719
60
- liger_kernel/transformers/rope.py,sha256=ZTrTORSAyfcFIKjk6XEeYmk4ROH7xXED9L4g2NFntlE,999
61
- liger_kernel/transformers/softmax.py,sha256=yadlAgE4V2JByMwrDDa2s5SUBp8Jgd57xwnVvAWoBaI,264
62
- liger_kernel/transformers/sparsemax.py,sha256=0lQA0UEOs4mu8CMruZ3VLhImxQVXJWhPsAKUsYA7vj8,403
63
- liger_kernel/transformers/swiglu.py,sha256=LZ8YeLIdv2k46JleZMjzubGk98smt6t780kSgcVLsQk,3454
64
- liger_kernel/transformers/trainer_integration.py,sha256=W3ON51O5GkyzNJsItz0y5rKx-uy2f2cFfveZpqbUdhw,123
65
- liger_kernel/transformers/tvd.py,sha256=XrRfyJIqN6HFxXk8MYyFVZM1OLz3mtSbRZvWfZ_JerQ,450
66
- liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1wcRgDSyjBMv5i1a7BrDPDQw,881
67
- liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- liger_kernel/transformers/model/gemma.py,sha256=gvP-9zZ1e-DQD06qltWmRhiJClJDtkMQL1UrPMMZZGQ,9730
69
- liger_kernel/transformers/model/gemma2.py,sha256=ORmzklEAMpk93nToRo4d_ZJbM4ScVE2szczsEL4hw7w,11019
70
- liger_kernel/transformers/model/gemma3.py,sha256=JI4jj9K660HeRsofB6cpkCHBQ0OsazElArRtKUehUmw,15945
71
- liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
72
- liger_kernel/transformers/model/llama.py,sha256=LcIxVfF0PXXWHBVJa6Ody_5fAtIpxQcI4jC_j-o51fU,12503
73
- liger_kernel/transformers/model/llava.py,sha256=bLCioday_SOm69ogMDBhy_4UsVkH2-BSl93-EXY6-7I,15076
74
- liger_kernel/transformers/model/loss_utils.py,sha256=WWAMdiONPaXpIvxyOim_0igLrYh0yyOok5Q9_L9xvZw,1787
75
- liger_kernel/transformers/model/mistral.py,sha256=okKkyashfFLfhjIT--f3JY6JHOslOtDI8U1dlpBC2Zs,5565
76
- liger_kernel/transformers/model/mixtral.py,sha256=VY-y73IyjcCyWyI7ahxXLw0fJrhgjYfr1xwRYtsHX0o,11396
77
- liger_kernel/transformers/model/mllama.py,sha256=my29NXk-p6ckQaP8qDIN8e318yI_9mQZHt38MV3SqLY,11280
78
- liger_kernel/transformers/model/olmo2.py,sha256=6L_bo-ZUgO1lYppdJneOtYxNIylQKS6BiGp13g7Uq9E,5259
79
- liger_kernel/transformers/model/paligemma.py,sha256=xuIx3oOwTgftU3jqLfWOxUxgCLBNJh0yNC21an9qDjo,18773
80
- liger_kernel/transformers/model/phi3.py,sha256=m-MD_OuTaYMGZhHOvl-RHOVEObrL8tL5cBv3VTNd4F0,10376
81
- liger_kernel/transformers/model/qwen2.py,sha256=SdN7V-MI3eX9s2DAFRvC1g-G146uG_5n1fnNdY9QwYk,9658
82
- liger_kernel/transformers/model/qwen2_5_vl.py,sha256=zEVVwotCXnAm3RRc8-1Nc8uitSWrwW4B9dYY2uOZDwg,6331
83
- liger_kernel/transformers/model/qwen2_vl.py,sha256=5vK-vtCDpKZ2w33xYp2BS8kQYWUbKMqaiKvQcI27Mss,5884
84
- liger_kernel/transformers/model/qwen3.py,sha256=w2jBHuK9kK9EmOr5dnEIXNQXUgUSV_sJUkXSEwxLPHs,4885
85
- liger_kernel/transformers/model/qwen3_moe.py,sha256=BkpfFH3fOH0yRfA7LF-AoHTLut2GV0Y4MOlkiIYewfU,5511
86
- liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7HHWHwku25A-GYL0WU,193
87
- liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
88
- liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
89
- liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
90
- liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
91
- liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/METADATA,sha256=QSqkcNekBuJTkV8QZdt5x9iGR-5Z4G_BIbqhCRDvkIs,24358
92
- liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
93
- liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
94
- liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
95
- liger_kernel_nightly-0.5.10.dev20250624183504.dist-info/RECORD,,