PyPI - liger-kernel-nightly - Versions diffs - 0.4.1.dev20241115012952__py3-none-any.whl → 0.4.1.dev20241115210858__py3-none-any.whl - Mend

liger-kernel-nightly 0.4.1.dev20241115012952py3-none-any.whl → 0.4.1.dev20241115210858py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

liger_kernel/ops/fused_linear_cross_entropy.py CHANGED Viewed

@@ -229,6 +229,7 @@ class LigerFusedLinearCrossEntropyFunction(torch.autograd.Function):
         label_smoothing (float): The amount of smoothing when computing the loss, where 0.0 means no smoothing.
         reduction: reduction to apply
         """
         loss, grad_input, grad_weight, grad_bias = fused_linear_cross_entropy_forward(
             _input,
             weight,

liger_kernel/transformers/model/qwen2_vl.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from typing import List, Optional, Tuple, Union
 import torch
+from packaging import version
 from torch.nn import CrossEntropyLoss
+from transformers import __version__ as transformers_version
 from transformers.models.qwen2_vl.modeling_qwen2_vl import (
     _CONFIG_FOR_DOC,
     QWEN2_VL_INPUTS_DOCSTRING,
@@ -80,8 +82,6 @@ def lce_forward(
     >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
     "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
     ```"""
-    # FIXME: The code is outdated and not compatible with transformer >= 4.46.1
     output_attentions = (
         output_attentions
         if output_attentions is not None
@@ -100,27 +100,53 @@ def lce_forward(
         inputs_embeds = self.model.embed_tokens(input_ids)
         if pixel_values is not None:
             pixel_values = pixel_values.type(self.visual.get_dtype())
-            image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw).to(
-                inputs_embeds.device
+            image_embeds = self.visual(pixel_values, grid_thw=image_grid_thw)
+            n_image_tokens = (input_ids == self.config.image_token_id).sum().item()
+            n_image_features = image_embeds.shape[0]
+            if n_image_tokens != n_image_features:
+                raise ValueError(
+                    f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
+                )
+            image_mask = (
+                (input_ids == self.config.image_token_id)
+                .unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
             )
-            image_mask = input_ids == self.config.image_token_id
-            if self.training:
-                inputs_embeds = inputs_embeds.clone()
-            inputs_embeds[image_mask] = image_embeds
+            image_embeds = image_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
+            inputs_embeds = inputs_embeds.masked_scatter(image_mask, image_embeds)
         if pixel_values_videos is not None:
             pixel_values_videos = pixel_values_videos.type(self.visual.get_dtype())
-            video_embeds = self.visual(pixel_values_videos, grid_thw=video_grid_thw).to(
-                inputs_embeds.device
+            video_embeds = self.visual(pixel_values_videos, grid_thw=video_grid_thw)
+            n_video_tokens = (input_ids == self.config.video_token_id).sum().item()
+            n_video_features = video_embeds.shape[0]
+            if n_video_tokens != n_video_features:
+                raise ValueError(
+                    f"Video features and video tokens do not match: tokens: {n_video_tokens}, features {n_video_features}"
+                )
+            video_mask = (
+                (input_ids == self.config.video_token_id)
+                .unsqueeze(-1)
+                .expand_as(inputs_embeds)
+                .to(inputs_embeds.device)
             )
-            video_mask = input_ids == self.config.video_token_id
-            inputs_embeds[video_mask] = video_embeds
+            video_embeds = video_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
+            inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
         if attention_mask is not None:
             attention_mask = attention_mask.to(inputs_embeds.device)
-    # The code is copied from https://github.com/huggingface/transformers/pull/33487
-    if position_ids is None and input_ids is not None:
-        position_ids, _ = self.get_rope_index(
-            input_ids, image_grid_thw, video_grid_thw, attention_mask
-        )
+    if version.parse(transformers_version) > version.parse("4.46.2"):
+        # NOTE: this bug fix for qwen2-vl is not applied until transformers 4.47.0
+        # https://github.com/huggingface/transformers/issues/33401
+        # While correct, this breaks equivalence with past versions of Qwen2-VL from
+        # transformers and leads to failed tests or users noticing differences in results.
+        # TODO: remove above conditional when liger drops support for transformers<4.47.0
+        if position_ids is None and input_ids is not None:
+            position_ids, _ = self.get_rope_index(
+                input_ids, image_grid_thw, video_grid_thw, attention_mask
+            )
     outputs = self.model(
         input_ids=None,

{liger_kernel_nightly-0.4.1.dev20241115012952.dist-info → liger_kernel_nightly-0.4.1.dev20241115210858.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: liger_kernel_nightly
-Version: 0.4.1.dev20241115012952
+Version: 0.4.1.dev20241115210858
 Summary: Efficient Triton kernels for LLM Training
 License: BSD 2-CLAUSE LICENSE
         Copyright 2024 LinkedIn Corporation

{liger_kernel_nightly-0.4.1.dev20241115012952.dist-info → liger_kernel_nightly-0.4.1.dev20241115210858.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ liger_kernel/chunked_loss/fused_linear_preference.py,sha256=ayx-dmAx1TW9sThHJ_wU
 liger_kernel/chunked_loss/orpo_loss.py,sha256=DNifPpzGV_t3dfOPlPy2XKDM6M1Qne0kCbIPztvFY9U,2179
 liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 liger_kernel/ops/cross_entropy.py,sha256=sfUb7-jIZp0EKXjg1DYy2Wdzw_Mg-mHmGoR5bpdm4tw,15526
-liger_kernel/ops/fused_linear_cross_entropy.py,sha256=JPiQ0TgPjtQ-3F5ovC0b5ZnBk067XUmzyNuGO3KZv44,9963
+liger_kernel/ops/fused_linear_cross_entropy.py,sha256=ib7M3AjJE164yMfuS9R39k-5qnDgYOXptIT146lqYbg,9964
 liger_kernel/ops/fused_linear_jsd.py,sha256=5D_obamh08lGGTMyh85kBJD_aNjPhOYf4-TmCZ6m4s4,9626
 liger_kernel/ops/geglu.py,sha256=MQL4zyzneZqZYUGPvb1QjI_EYT9_pKfSDgR25WD9jrI,4127
 liger_kernel/ops/group_norm.py,sha256=VaRErVJGR4JqgXXvuIjNGTn3E2egjLtU1y3ymwIf4d8,10961
@@ -44,12 +44,12 @@ liger_kernel/transformers/model/mixtral.py,sha256=nyDS1dBpsOXYC2DuW59Hgu7ZrGftrH
 liger_kernel/transformers/model/mllama.py,sha256=mesNCgj0Ea1O-fqRD4LVxDJ1CR2abY_zAzK_bfVzkiU,11222
 liger_kernel/transformers/model/phi3.py,sha256=xUZPlaPKwknLjHc3uUW3EPodm1h0vD3G7Qnhh51v-Io,10332
 liger_kernel/transformers/model/qwen2.py,sha256=EyhSSzQOskGjSnCsKMZpd1s5IAIlHd5PBO3q0MoCs00,9619
-liger_kernel/transformers/model/qwen2_vl.py,sha256=j6xAhp9AG195dsZK5f8dFYVM9uKtWApZrggT5Y08jn4,7055
+liger_kernel/transformers/model/qwen2_vl.py,sha256=bIQe2bWiY--G84FhCD29Gdi64_qHP6vbcGsK6vKysQE,8547
 liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
 liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
-liger_kernel_nightly-0.4.1.dev20241115012952.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
-liger_kernel_nightly-0.4.1.dev20241115012952.dist-info/METADATA,sha256=Yl1zBjC9UmijcLm5KFqVGR8zVQGPWRX2G1g99RE6UPw,21556
-liger_kernel_nightly-0.4.1.dev20241115012952.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
-liger_kernel_nightly-0.4.1.dev20241115012952.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-liger_kernel_nightly-0.4.1.dev20241115012952.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
-liger_kernel_nightly-0.4.1.dev20241115012952.dist-info/RECORD,,
+liger_kernel_nightly-0.4.1.dev20241115210858.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
+liger_kernel_nightly-0.4.1.dev20241115210858.dist-info/METADATA,sha256=VsDMgGO6VdbcC6qFTtPSALLozMM_bwcOl-MgZTzZKLY,21556
+liger_kernel_nightly-0.4.1.dev20241115210858.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
+liger_kernel_nightly-0.4.1.dev20241115210858.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+liger_kernel_nightly-0.4.1.dev20241115210858.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
+liger_kernel_nightly-0.4.1.dev20241115210858.dist-info/RECORD,,

{liger_kernel_nightly-0.4.1.dev20241115012952.dist-info → liger_kernel_nightly-0.4.1.dev20241115210858.dist-info}/LICENSE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.4.1.dev20241115012952.dist-info → liger_kernel_nightly-0.4.1.dev20241115210858.dist-info}/NOTICE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.4.1.dev20241115012952.dist-info → liger_kernel_nightly-0.4.1.dev20241115210858.dist-info}/WHEEL RENAMED Viewed

File without changes

{liger_kernel_nightly-0.4.1.dev20241115012952.dist-info → liger_kernel_nightly-0.4.1.dev20241115210858.dist-info}/top_level.txt RENAMED Viewed

File without changes

liger-kernel-nightly 0.4.1.dev20241115012952__py3-none-any.whl → 0.4.1.dev20241115210858__py3-none-any.whl

liger-kernel-nightly 0.4.1.dev20241115012952py3-none-any.whl → 0.4.1.dev20241115210858py3-none-any.whl