PyPI - liger-kernel-nightly - Versions diffs - 0.6.1.dev20250728225847__py3-none-any.whl → 0.6.1.dev20250730201330__py3-none-any.whl - Mend

liger-kernel-nightly 0.6.1.dev20250728225847py3-none-any.whl → 0.6.1.dev20250730201330py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

liger_kernel/chunked_loss/dpo_loss.py CHANGED Viewed

@@ -13,6 +13,7 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
         ref_chosen_logps=None,
         ref_rejected_logps=None,
         beta=0.1,
+        loss_type="sigmoid",
     ):
         """
         Paper: https://arxiv.org/pdf/2305.18290
@@ -48,8 +49,50 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
         chosen_rewards = beta * chosen_logratios
         rejected_rewards = beta * rejected_logratios
-        logits_diff = beta * (chosen_logratios - rejected_logratios)
-        loss = -F.logsigmoid(logits_diff).sum() / (full_target.shape[0] // 2)
+        if loss_type == "sigmoid":
+            logits_diff = beta * (chosen_logratios - rejected_logratios)
+            loss = -F.logsigmoid(logits_diff).sum() / (full_target.shape[0] // 2)
+        elif loss_type == "apo_zero":
+            # Eqn (7) of the APO paper (https://huggingface.co/papers/2408.06266)
+            # Use this loss when you believe the chosen outputs are better than your model's default output
+            losses_chosen = 1 - F.sigmoid(beta * chosen_logratios)  # Increase chosen likelihood
+            losses_rejected = F.sigmoid(beta * rejected_logratios)
+            losses = losses_chosen + losses_rejected
+            loss = losses.sum() / (full_target.shape[0] // 2)
+        elif loss_type == "apo_down":
+            # Eqn (8) of the APO paper (https://huggingface.co/papers/2408.06266)
+            # Use this loss when you believe the chosen outputs are worse than your model's default output.
+            # Decrease chosen likelihood and decrease rejected likelihood more
+            losses_chosen = F.sigmoid(beta * chosen_logratios)
+            losses_rejected = 1 - F.sigmoid(beta * (chosen_logratios - rejected_logratios))
+            losses = losses_chosen + losses_rejected
+            loss = losses.sum() / (full_target.shape[0] // 2)
+        elif loss_type == "sppo_hard":
+            # In the paper (https://huggingface.co/papers/2405.00675), SPPO employs a soft probability approach,
+            # estimated using the PairRM score. The probability calculation is conducted outside of the trainer class.
+            # The version described here is the hard probability version, where P in Equation (4.7) of Algorithm 1 is
+            # set to 1 for the winner and 0 for the loser.
+            a = chosen_logps - ref_chosen_logps
+            b = rejected_logps - ref_rejected_logps
+            losses = (a - 0.5 / beta) ** 2 + (b + 0.5 / beta) ** 2
+            loss = losses.sum() / (full_target.shape[0] // 2)
+        elif loss_type == "nca_pair":
+            losses = (
+                -F.logsigmoid(chosen_rewards)
+                - 0.5 * F.logsigmoid(-chosen_rewards)
+                - 0.5 * F.logsigmoid(-rejected_rewards)
+            )
+            loss = losses.sum() / (full_target.shape[0] // 2)
+        else:
+            raise ValueError(
+                f"Unsupported loss_type: {loss_type}. Supported types are: sigmoid, apo_zero, apo_down, sppo_hard, nca_pair"
+            )
         return loss, chosen_rewards, rejected_rewards
     @classmethod
@@ -70,6 +113,7 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
         use_ref_model=True,
         average_log_prob=False,
         chunk_size=1,
+        loss_type="sigmoid",
     ):
         """
         Fused linear layer with DPO loss.
@@ -108,12 +152,13 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
             ref_bias=ref_bias,
             average_log_prob=average_log_prob,
             chunk_size=chunk_size,
+            loss_type=loss_type,
         )
     @staticmethod
     def backward(ctx, *grad_output):
         grads = LigerFusedLinearPreferenceBase.backward(ctx, grad_output)[:4]
-        return *grads, None, None, None, None, None, None, None, None, None, None
+        return *grads, None, None, None, None, None, None, None, None, None, None, None
 class LigerFusedLinearDPOLoss(torch.nn.Module):
@@ -130,6 +175,7 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
         use_ref_model: bool = True,
         average_log_prob: bool = False,
         chunk_size: int = 1,
+        loss_type: str = "sigmoid",
     ):
         """
         Args:
@@ -149,6 +195,10 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
         self.use_ref_model = use_ref_model
         self.average_log_prob = average_log_prob
         self.chunk_size = chunk_size
+        self.loss_type = loss_type
+        supported_loss_types = {"sigmoid", "apo_zero", "apo_down", "sppo_hard", "nca_pair"}
+        if self.loss_type not in supported_loss_types:
+            raise ValueError(f"Unsupported loss_type: {self.loss_type}. Supported types are: {supported_loss_types}")
     def forward(
         self,
@@ -175,4 +225,5 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
             self.use_ref_model,
             self.average_log_prob,
             self.chunk_size,
+            self.loss_type,
         )

{liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: liger_kernel_nightly
-Version: 0.6.1.dev20250728225847
+Version: 0.6.1.dev20250730201330
 Summary: Efficient Triton kernels for LLM Training
 License: BSD 2-CLAUSE LICENSE
         Copyright 2024 LinkedIn Corporation

{liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EB
 liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
 liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=pZ07OQ6RI-c8uk96tDRlUXdt31-da7yWhfwircZlKRw,4198
 liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
-liger_kernel/chunked_loss/dpo_loss.py,sha256=tapMiNdI8_ufW55iG0Ud4dmiW39gu1DzlvtoOCHrdGg,6259
+liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
 liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
 liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
 liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=AA19cpv6D8mo5RbSK5GRCcZoOSnpxV_Z1eJlAsC5eic,13434
@@ -92,9 +92,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
 liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
 liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
 liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
-liger_kernel_nightly-0.6.1.dev20250728225847.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
-liger_kernel_nightly-0.6.1.dev20250728225847.dist-info/METADATA,sha256=lwxW2aBj47ch0UzjwtNGSRE4t5D6hE0ci78NRJJfWz4,24502
-liger_kernel_nightly-0.6.1.dev20250728225847.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
-liger_kernel_nightly-0.6.1.dev20250728225847.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-liger_kernel_nightly-0.6.1.dev20250728225847.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
-liger_kernel_nightly-0.6.1.dev20250728225847.dist-info/RECORD,,
+liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
+liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/METADATA,sha256=hsqE3iGoX7WtGGruvTTrjB4G4sfkTi9UYThz_vOdwos,24502
+liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
+liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
+liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/RECORD,,

{liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/LICENSE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/NOTICE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/WHEEL RENAMED Viewed

File without changes

{liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/top_level.txt RENAMED Viewed

File without changes

liger-kernel-nightly 0.6.1.dev20250728225847__py3-none-any.whl → 0.6.1.dev20250730201330__py3-none-any.whl

liger-kernel-nightly 0.6.1.dev20250728225847py3-none-any.whl → 0.6.1.dev20250730201330py3-none-any.whl