PyPI - liger-kernel-nightly - Versions diffs - 0.5.8.dev20250416185644__py3-none-any.whl → 0.5.8.dev20250422210723__py3-none-any.whl - Mend

liger-kernel-nightly 0.5.8.dev20250416185644py3-none-any.whl → 0.5.8.dev20250422210723py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

liger_kernel/chunked_loss/dpo_loss.py CHANGED Viewed

@@ -68,6 +68,7 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
         compute_nll_loss=False,
         compiled=True,
         use_ref_model=True,
+        average_log_prob=False,
         chunk_size=1,
     ):
         """
@@ -85,6 +86,7 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
             compute_nll_loss (bool): Whether to compute the NLL loss
             compiled (bool): Whether to use torch compile
             use_ref_model (bool): Whether to use a reference model
+            average_log_prob (bool): Whether to average the log probability per non-masked token
             chunk_size (int): Size of chunks for processing.
         Returns:
             torch.Tensor: Computed loss
@@ -104,13 +106,14 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
             ref_input=ref_input,
             ref_weight=ref_weight,
             ref_bias=ref_bias,
+            average_log_prob=average_log_prob,
             chunk_size=chunk_size,
         )
     @staticmethod
     def backward(ctx, *grad_output):
         grads = LigerFusedLinearPreferenceBase.backward(ctx, grad_output)[:4]
-        return *grads, None, None, None, None, None, None, None, None, None
+        return *grads, None, None, None, None, None, None, None, None, None, None
 class LigerFusedLinearDPOLoss(torch.nn.Module):
@@ -125,6 +128,7 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
         compute_nll_loss: bool = False,
         compiled: bool = True,
         use_ref_model: bool = True,
+        average_log_prob: bool = True,
         chunk_size: int = 1,
     ):
         """
@@ -134,6 +138,7 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
             compute_nll_loss (bool): Whether to compute the NLL loss.
             compiled (bool): Whether to use the torch compiled kernel.
             use_ref_model (bool): Whether to use a reference model for the DPO loss.
+            average_log_prob (bool): Whether to average the log probability per non-masked token.
             chunk_size (int): Size of chunks for processing.
         """
         super().__init__()
@@ -142,6 +147,7 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
         self.compute_nll_loss = compute_nll_loss
         self.compiled = compiled
         self.use_ref_model = use_ref_model
+        self.average_log_prob = average_log_prob
         self.chunk_size = chunk_size
     def forward(
@@ -167,5 +173,6 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
             self.compute_nll_loss,
             self.compiled,
             self.use_ref_model,
+            self.average_log_prob,
             self.chunk_size,
         )

{liger_kernel_nightly-0.5.8.dev20250416185644.dist-info → liger_kernel_nightly-0.5.8.dev20250422210723.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: liger_kernel_nightly
-Version: 0.5.8.dev20250416185644
+Version: 0.5.8.dev20250422210723
 Summary: Efficient Triton kernels for LLM Training
 License: BSD 2-CLAUSE LICENSE
         Copyright 2024 LinkedIn Corporation

{liger_kernel_nightly-0.5.8.dev20250416185644.dist-info → liger_kernel_nightly-0.5.8.dev20250422210723.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ liger_kernel/utils.py,sha256=178Hn8uD-VauDT6FjqMyXLbKLod8ObIpaTtapHwfEK0,1861
 liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
 liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIuvoIbJWw4,603
 liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
-liger_kernel/chunked_loss/dpo_loss.py,sha256=xZwGqS04si9zXyob95SAdalC-hajZg8fWINqiqffN8k,5855
+liger_kernel/chunked_loss/dpo_loss.py,sha256=Xypt4FoTSmAnJE4SWtsCv4aNHK4ToR1LonUQtCTEuHQ,6258
 liger_kernel/chunked_loss/functional.py,sha256=9G3nKm-Bi7uoZRFkL8wwGMl6juDl4bSzDvTa5GHZPzg,955
 liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
 liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=AA19cpv6D8mo5RbSK5GRCcZoOSnpxV_Z1eJlAsC5eic,13434
@@ -74,9 +74,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
 liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
 liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
 liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
-liger_kernel_nightly-0.5.8.dev20250416185644.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
-liger_kernel_nightly-0.5.8.dev20250416185644.dist-info/METADATA,sha256=DMyDK7rTzTSE8a03KwKq6MmT6aHmPX3XIuhShff4Qgs,23297
-liger_kernel_nightly-0.5.8.dev20250416185644.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
-liger_kernel_nightly-0.5.8.dev20250416185644.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-liger_kernel_nightly-0.5.8.dev20250416185644.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
-liger_kernel_nightly-0.5.8.dev20250416185644.dist-info/RECORD,,
+liger_kernel_nightly-0.5.8.dev20250422210723.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
+liger_kernel_nightly-0.5.8.dev20250422210723.dist-info/METADATA,sha256=aSh18zXYcQy1fb3OW8Q-Q9_DYczeWXULpNDET3PCbfg,23297
+liger_kernel_nightly-0.5.8.dev20250422210723.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
+liger_kernel_nightly-0.5.8.dev20250422210723.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+liger_kernel_nightly-0.5.8.dev20250422210723.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
+liger_kernel_nightly-0.5.8.dev20250422210723.dist-info/RECORD,,

{liger_kernel_nightly-0.5.8.dev20250416185644.dist-info → liger_kernel_nightly-0.5.8.dev20250422210723.dist-info}/LICENSE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.8.dev20250416185644.dist-info → liger_kernel_nightly-0.5.8.dev20250422210723.dist-info}/NOTICE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.8.dev20250416185644.dist-info → liger_kernel_nightly-0.5.8.dev20250422210723.dist-info}/WHEEL RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.8.dev20250416185644.dist-info → liger_kernel_nightly-0.5.8.dev20250422210723.dist-info}/top_level.txt RENAMED Viewed

File without changes

liger-kernel-nightly 0.5.8.dev20250416185644__py3-none-any.whl → 0.5.8.dev20250422210723__py3-none-any.whl

liger-kernel-nightly 0.5.8.dev20250416185644py3-none-any.whl → 0.5.8.dev20250422210723py3-none-any.whl