liger-kernel-nightly 0.5.1.dev20241210093009__tar.gz → 0.5.1.dev20241210172102__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.1.dev20241210093009/src/liger_kernel_nightly.egg-info → liger_kernel_nightly-0.5.1.dev20241210172102}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/qwen2vl_mrope.py +13 -12
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/qwen2vl_mrope.py +2 -2
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102/src/liger_kernel_nightly.egg-info}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/README.md +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.1.
|
7
|
+
version = "0.5.1.dev20241210172102"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -10,6 +10,7 @@ def _triton_qwen2vl_mrope(
|
|
10
10
|
cos,
|
11
11
|
sin,
|
12
12
|
sl,
|
13
|
+
bs: tl.constexpr,
|
13
14
|
n_qh: tl.constexpr,
|
14
15
|
n_kh: tl.constexpr,
|
15
16
|
hd: tl.constexpr,
|
@@ -41,13 +42,12 @@ def _triton_qwen2vl_mrope(
|
|
41
42
|
t_end = mrope_section_t
|
42
43
|
h_end = t_end + mrope_section_h
|
43
44
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
w_sin = h_sin + sl * hd
|
45
|
+
t_cos = cos + pid * hd
|
46
|
+
h_cos = t_cos + bs * sl * hd
|
47
|
+
w_cos = h_cos + bs * sl * hd
|
48
|
+
t_sin = sin + pid * hd
|
49
|
+
h_sin = t_sin + bs * sl * hd
|
50
|
+
w_sin = h_sin + bs * sl * hd
|
51
51
|
|
52
52
|
cos_offsets = tl.arange(0, pad_hd // 2)
|
53
53
|
t_mask = cos_offsets < t_end
|
@@ -151,6 +151,7 @@ def qwen2vl_mrope_forward(q, k, cos, sin, mrope_section):
|
|
151
151
|
cos,
|
152
152
|
sin,
|
153
153
|
seq_len,
|
154
|
+
batch_size,
|
154
155
|
n_q_head,
|
155
156
|
n_kv_head,
|
156
157
|
head_dim,
|
@@ -189,6 +190,7 @@ def qwen2vl_mrope_backward(dq, dk, cos, sin, mrope_section):
|
|
189
190
|
cos,
|
190
191
|
sin,
|
191
192
|
seq_len,
|
193
|
+
batch_size,
|
192
194
|
n_q_head,
|
193
195
|
n_kv_head,
|
194
196
|
head_dim,
|
@@ -216,8 +218,8 @@ class LigerQwen2VLMRopeFunction(torch.autograd.Function):
|
|
216
218
|
"""
|
217
219
|
q size: (bsz, n_q_head, seq_len, head_dim)
|
218
220
|
k size: (bsz, n_kv_head, seq_len, head_dim)
|
219
|
-
cos size: (3,
|
220
|
-
sin size: (3,
|
221
|
+
cos size: (3, bsz, seq_len, head_dim)
|
222
|
+
sin size: (3, bsz, seq_len, head_dim)
|
221
223
|
"""
|
222
224
|
q, k, cos, sin = qwen2vl_mrope_forward(q, k, cos, sin, mrope_section)
|
223
225
|
ctx.save_for_backward(cos, sin)
|
@@ -228,10 +230,9 @@ class LigerQwen2VLMRopeFunction(torch.autograd.Function):
|
|
228
230
|
"""
|
229
231
|
dq size: (bsz, n_q_head, seq_len, head_dim)
|
230
232
|
dk size: (bsz, n_kv_head, seq_len, head_dim)
|
231
|
-
cos size: (3,
|
232
|
-
sin size: (3,
|
233
|
+
cos size: (3, bsz, seq_len, head_dim)
|
234
|
+
sin size: (3, bsz, seq_len, head_dim)
|
233
235
|
"""
|
234
|
-
|
235
236
|
cos, sin = ctx.saved_tensors
|
236
237
|
mrope_section = ctx.mrope_section
|
237
238
|
dq, dk = qwen2vl_mrope_backward(dq, dk, cos, sin, mrope_section)
|
@@ -8,8 +8,8 @@ def liger_multimodal_rotary_pos_emb(q, k, cos, sin, mrope_section, unsqueeze_dim
|
|
8
8
|
Args:
|
9
9
|
q (torch.Tensor): The query tensor of shape (bsz, n_q_head, seq_len, head_dim).
|
10
10
|
k (torch.Tensor): The key tensor of shape (bsz, n_kv_head, seq_len, head_dim).
|
11
|
-
cos (torch.Tensor): The cosine tensor of shape (3,
|
12
|
-
sin (torch.Tensor): The sine tensor of shape (3,
|
11
|
+
cos (torch.Tensor): The cosine tensor of shape (3, bsz, seq_len, head_dim).
|
12
|
+
sin (torch.Tensor): The sine tensor of shape (3, bsz, seq_len, head_dim).
|
13
13
|
mrope_section (List[int]): The multimodal rope section for channel dimension of temporal, height and width in rope calculation.
|
14
14
|
unsqueeze_dim (int, optional): The dimension to unsqueeze. Defaults to 1.
|
15
15
|
|
File without changes
|
{liger_kernel_nightly-0.5.1.dev20241210093009 → liger_kernel_nightly-0.5.1.dev20241210172102}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|