liger-kernel-nightly 0.6.1.dev20250728225847__py3-none-any.whl → 0.6.1.dev20250730201330__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/chunked_loss/dpo_loss.py +54 -3
- {liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/METADATA +1 -1
- {liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/RECORD +7 -7
- {liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.1.dev20250728225847.dist-info → liger_kernel_nightly-0.6.1.dev20250730201330.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
|
|
13
13
|
ref_chosen_logps=None,
|
14
14
|
ref_rejected_logps=None,
|
15
15
|
beta=0.1,
|
16
|
+
loss_type="sigmoid",
|
16
17
|
):
|
17
18
|
"""
|
18
19
|
Paper: https://arxiv.org/pdf/2305.18290
|
@@ -48,8 +49,50 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
|
|
48
49
|
chosen_rewards = beta * chosen_logratios
|
49
50
|
rejected_rewards = beta * rejected_logratios
|
50
51
|
|
51
|
-
|
52
|
-
|
52
|
+
if loss_type == "sigmoid":
|
53
|
+
logits_diff = beta * (chosen_logratios - rejected_logratios)
|
54
|
+
loss = -F.logsigmoid(logits_diff).sum() / (full_target.shape[0] // 2)
|
55
|
+
|
56
|
+
elif loss_type == "apo_zero":
|
57
|
+
# Eqn (7) of the APO paper (https://huggingface.co/papers/2408.06266)
|
58
|
+
# Use this loss when you believe the chosen outputs are better than your model's default output
|
59
|
+
losses_chosen = 1 - F.sigmoid(beta * chosen_logratios) # Increase chosen likelihood
|
60
|
+
losses_rejected = F.sigmoid(beta * rejected_logratios)
|
61
|
+
losses = losses_chosen + losses_rejected
|
62
|
+
loss = losses.sum() / (full_target.shape[0] // 2)
|
63
|
+
|
64
|
+
elif loss_type == "apo_down":
|
65
|
+
# Eqn (8) of the APO paper (https://huggingface.co/papers/2408.06266)
|
66
|
+
# Use this loss when you believe the chosen outputs are worse than your model's default output.
|
67
|
+
# Decrease chosen likelihood and decrease rejected likelihood more
|
68
|
+
losses_chosen = F.sigmoid(beta * chosen_logratios)
|
69
|
+
losses_rejected = 1 - F.sigmoid(beta * (chosen_logratios - rejected_logratios))
|
70
|
+
losses = losses_chosen + losses_rejected
|
71
|
+
loss = losses.sum() / (full_target.shape[0] // 2)
|
72
|
+
|
73
|
+
elif loss_type == "sppo_hard":
|
74
|
+
# In the paper (https://huggingface.co/papers/2405.00675), SPPO employs a soft probability approach,
|
75
|
+
# estimated using the PairRM score. The probability calculation is conducted outside of the trainer class.
|
76
|
+
# The version described here is the hard probability version, where P in Equation (4.7) of Algorithm 1 is
|
77
|
+
# set to 1 for the winner and 0 for the loser.
|
78
|
+
a = chosen_logps - ref_chosen_logps
|
79
|
+
b = rejected_logps - ref_rejected_logps
|
80
|
+
losses = (a - 0.5 / beta) ** 2 + (b + 0.5 / beta) ** 2
|
81
|
+
loss = losses.sum() / (full_target.shape[0] // 2)
|
82
|
+
|
83
|
+
elif loss_type == "nca_pair":
|
84
|
+
losses = (
|
85
|
+
-F.logsigmoid(chosen_rewards)
|
86
|
+
- 0.5 * F.logsigmoid(-chosen_rewards)
|
87
|
+
- 0.5 * F.logsigmoid(-rejected_rewards)
|
88
|
+
)
|
89
|
+
loss = losses.sum() / (full_target.shape[0] // 2)
|
90
|
+
|
91
|
+
else:
|
92
|
+
raise ValueError(
|
93
|
+
f"Unsupported loss_type: {loss_type}. Supported types are: sigmoid, apo_zero, apo_down, sppo_hard, nca_pair"
|
94
|
+
)
|
95
|
+
|
53
96
|
return loss, chosen_rewards, rejected_rewards
|
54
97
|
|
55
98
|
@classmethod
|
@@ -70,6 +113,7 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
|
|
70
113
|
use_ref_model=True,
|
71
114
|
average_log_prob=False,
|
72
115
|
chunk_size=1,
|
116
|
+
loss_type="sigmoid",
|
73
117
|
):
|
74
118
|
"""
|
75
119
|
Fused linear layer with DPO loss.
|
@@ -108,12 +152,13 @@ class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
|
|
108
152
|
ref_bias=ref_bias,
|
109
153
|
average_log_prob=average_log_prob,
|
110
154
|
chunk_size=chunk_size,
|
155
|
+
loss_type=loss_type,
|
111
156
|
)
|
112
157
|
|
113
158
|
@staticmethod
|
114
159
|
def backward(ctx, *grad_output):
|
115
160
|
grads = LigerFusedLinearPreferenceBase.backward(ctx, grad_output)[:4]
|
116
|
-
return *grads, None, None, None, None, None, None, None, None, None, None
|
161
|
+
return *grads, None, None, None, None, None, None, None, None, None, None, None
|
117
162
|
|
118
163
|
|
119
164
|
class LigerFusedLinearDPOLoss(torch.nn.Module):
|
@@ -130,6 +175,7 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
|
|
130
175
|
use_ref_model: bool = True,
|
131
176
|
average_log_prob: bool = False,
|
132
177
|
chunk_size: int = 1,
|
178
|
+
loss_type: str = "sigmoid",
|
133
179
|
):
|
134
180
|
"""
|
135
181
|
Args:
|
@@ -149,6 +195,10 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
|
|
149
195
|
self.use_ref_model = use_ref_model
|
150
196
|
self.average_log_prob = average_log_prob
|
151
197
|
self.chunk_size = chunk_size
|
198
|
+
self.loss_type = loss_type
|
199
|
+
supported_loss_types = {"sigmoid", "apo_zero", "apo_down", "sppo_hard", "nca_pair"}
|
200
|
+
if self.loss_type not in supported_loss_types:
|
201
|
+
raise ValueError(f"Unsupported loss_type: {self.loss_type}. Supported types are: {supported_loss_types}")
|
152
202
|
|
153
203
|
def forward(
|
154
204
|
self,
|
@@ -175,4 +225,5 @@ class LigerFusedLinearDPOLoss(torch.nn.Module):
|
|
175
225
|
self.use_ref_model,
|
176
226
|
self.average_log_prob,
|
177
227
|
self.chunk_size,
|
228
|
+
self.loss_type,
|
178
229
|
)
|
@@ -5,7 +5,7 @@ liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EB
|
|
5
5
|
liger_kernel/chunked_loss/__init__.py,sha256=J5_jNnzZ4gZmA38W5f_4oab7xMoNk1Xy-yh3X_Xlf-s,714
|
6
6
|
liger_kernel/chunked_loss/cosine_similarity_loss.py,sha256=pZ07OQ6RI-c8uk96tDRlUXdt31-da7yWhfwircZlKRw,4198
|
7
7
|
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
8
|
-
liger_kernel/chunked_loss/dpo_loss.py,sha256=
|
8
|
+
liger_kernel/chunked_loss/dpo_loss.py,sha256=I83khNs3QQjuhr8U3NIOAACkbse6DNiBV-TulPZ0lXw,9006
|
9
9
|
liger_kernel/chunked_loss/functional.py,sha256=-XPDbLml9dHmvoSU2VNTUrBDFehuzvuAGPikVetBMtI,1132
|
10
10
|
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
|
11
11
|
liger_kernel/chunked_loss/fused_linear_ppo.py,sha256=AA19cpv6D8mo5RbSK5GRCcZoOSnpxV_Z1eJlAsC5eic,13434
|
@@ -92,9 +92,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
92
92
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
93
93
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
94
94
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
95
|
-
liger_kernel_nightly-0.6.1.
|
96
|
-
liger_kernel_nightly-0.6.1.
|
97
|
-
liger_kernel_nightly-0.6.1.
|
98
|
-
liger_kernel_nightly-0.6.1.
|
99
|
-
liger_kernel_nightly-0.6.1.
|
100
|
-
liger_kernel_nightly-0.6.1.
|
95
|
+
liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
96
|
+
liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/METADATA,sha256=hsqE3iGoX7WtGGruvTTrjB4G4sfkTi9UYThz_vOdwos,24502
|
97
|
+
liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
98
|
+
liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
99
|
+
liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
100
|
+
liger_kernel_nightly-0.6.1.dev20250730201330.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|