PyPI - liger-kernel-nightly - Versions diffs - 0.5.2.dev20241211055800__py3-none-any.whl → 0.5.2.dev20241211213024__py3-none-any.whl - Mend

liger-kernel-nightly 0.5.2.dev20241211055800py3-none-any.whl → 0.5.2.dev20241211213024py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

liger_kernel/chunked_loss/fused_linear_preference.py CHANGED Viewed

@@ -29,7 +29,7 @@ class LigerFusedLinearPreferenceBase(torch.autograd.Function):
         compute_nll_loss=True,
         compiled=True,
         use_ref_model=False,
-        # TODO: ref input
+        ref_input=None,
         ref_weight=None,
         ref_bias=None,
         **loss_kwargs,
@@ -59,6 +59,7 @@ class LigerFusedLinearPreferenceBase(torch.autograd.Function):
             compute_nll_loss (bool): Whether to compute NLL loss.
             compiled (bool): Whether to use torch compile for chunk accumulation.
             use_ref_model (bool): Whether to use a reference model for the alignment loss.
+            ref_input (torch.Tensor): Reference input tensor. Shape: (batch_size, seq_len, hidden_size).
             ref_weight (torch.Tensor): Reference weight tensor. Shape: (vocab_size, hidden_size).
             ref_bias (torch.Tensor, optional): Reference bias tensor. Shape: (vocab_size,).
             loss_kwargs (dict): Other possible arguments that a loss function might need
@@ -92,6 +93,7 @@ class LigerFusedLinearPreferenceBase(torch.autograd.Function):
             compute_nll_loss=compute_nll_loss,
             full_target=target,
             use_ref_model=use_ref_model,
+            ref_input=ref_input,
             ref_weight=ref_weight,
             ref_bias=ref_bias,
             **loss_kwargs,
@@ -301,6 +303,7 @@ class LigerFusedLinearPreferenceBase(torch.autograd.Function):
         beta=0.1,
         compute_nll_loss=True,
         use_ref_model=False,
+        ref_input=None,
         ref_weight=None,
         ref_bias=None,
         **loss_kwargs,
@@ -319,6 +322,7 @@ class LigerFusedLinearPreferenceBase(torch.autograd.Function):
             beta (float): Weight for the preference loss.
             compute_nll_loss (bool): Whether to compute NLL loss.
             use_ref_model (bool): Whether to use a reference model for the alignment loss.
+            ref_input (torch.Tensor): Reference input tensor. Shape: (2 * chunk_size, sequence_length, hidden_size).
             ref_weight (torch.Tensor): Reference weight tensor. Shape: (vocab_size, hidden_size).
             ref_bias (torch.Tensor, optional): Reference bias tensor. Shape: (vocab_size,).
             loss_kwargs (dict): Additional arguments for the loss function.
@@ -357,7 +361,7 @@ class LigerFusedLinearPreferenceBase(torch.autograd.Function):
                     ref_rejected_logits,
                     ref_chosen_nll_loss,
                 ) = LigerFusedLinearPreferenceBase.chunk_forward(
-                    input_chunk,
+                    ref_input,
                     ref_weight,
                     target_chunk,
                     ref_bias,

{liger_kernel_nightly-0.5.2.dev20241211055800.dist-info → liger_kernel_nightly-0.5.2.dev20241211213024.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: liger_kernel_nightly
-Version: 0.5.2.dev20241211055800
+Version: 0.5.2.dev20241211213024
 Summary: Efficient Triton kernels for LLM Training
 License: BSD 2-CLAUSE LICENSE
         Copyright 2024 LinkedIn Corporation

{liger_kernel_nightly-0.5.2.dev20241211055800.dist-info → liger_kernel_nightly-0.5.2.dev20241211213024.dist-info}/RECORD RENAMED Viewed

@@ -6,7 +6,7 @@ liger_kernel/chunked_loss/cpo_loss.py,sha256=Qu1Ul2A12sp6CqIT-atPbHWFb_LLtINEA9m
 liger_kernel/chunked_loss/dpo_loss.py,sha256=H9_RRhclckHYM2sd75tgbnf8IxC_PU2JCALbgtPQvwc,4222
 liger_kernel/chunked_loss/functional.py,sha256=9Gr-YXIuEzEJkBUhDx3G2fuQayckLor7cC7svhmPML4,549
 liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=2BH6DCPjsR2zS6zcwFPcIIZRhLF8SohjGdKsAJ_301o,10222
-liger_kernel/chunked_loss/fused_linear_preference.py,sha256=vlWfaaIECWvCQhY9PM7zRI0vKThIrydMf6P44bXn1EE,15114
+liger_kernel/chunked_loss/fused_linear_preference.py,sha256=qeRod4MFVttj62uPFhgKAWNNjVrqiEvu5SjZfRnOGzI,15389
 liger_kernel/chunked_loss/orpo_loss.py,sha256=ZuKGjbkIYzV4UzvupNdq6vyxCp7-BztQkUt8ZnFvKos,3531
 liger_kernel/chunked_loss/simpo_loss.py,sha256=Wa4LOlDG9PbJkOOkKg8hbKvnKgg7OTBz6-qIkwPK1yw,3275
 liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -57,9 +57,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=c4OQVJmhNOloj0JYSEc0j_cQuBb
 liger_kernel/transformers/trainer/orpo_trainer.py,sha256=jko6oq_XQdBSmXubp05E-_YXOyhtB5Bj75dg5YNwOsE,7517
 liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
 liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
-liger_kernel_nightly-0.5.2.dev20241211055800.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
-liger_kernel_nightly-0.5.2.dev20241211055800.dist-info/METADATA,sha256=HSfJ-qbwGmNDotIXkz8RNaQ9h8k2kA6qxAXsPvlv494,20721
-liger_kernel_nightly-0.5.2.dev20241211055800.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
-liger_kernel_nightly-0.5.2.dev20241211055800.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-liger_kernel_nightly-0.5.2.dev20241211055800.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
-liger_kernel_nightly-0.5.2.dev20241211055800.dist-info/RECORD,,
+liger_kernel_nightly-0.5.2.dev20241211213024.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
+liger_kernel_nightly-0.5.2.dev20241211213024.dist-info/METADATA,sha256=biU1_vrGRLdmGynYauvf4YfAVHrgN2RtGWe_CNuAD3c,20721
+liger_kernel_nightly-0.5.2.dev20241211213024.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
+liger_kernel_nightly-0.5.2.dev20241211213024.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+liger_kernel_nightly-0.5.2.dev20241211213024.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
+liger_kernel_nightly-0.5.2.dev20241211213024.dist-info/RECORD,,

{liger_kernel_nightly-0.5.2.dev20241211055800.dist-info → liger_kernel_nightly-0.5.2.dev20241211213024.dist-info}/LICENSE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.2.dev20241211055800.dist-info → liger_kernel_nightly-0.5.2.dev20241211213024.dist-info}/NOTICE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.2.dev20241211055800.dist-info → liger_kernel_nightly-0.5.2.dev20241211213024.dist-info}/WHEEL RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.2.dev20241211055800.dist-info → liger_kernel_nightly-0.5.2.dev20241211213024.dist-info}/top_level.txt RENAMED Viewed

File without changes

liger-kernel-nightly 0.5.2.dev20241211055800__py3-none-any.whl → 0.5.2.dev20241211213024__py3-none-any.whl

liger-kernel-nightly 0.5.2.dev20241211055800py3-none-any.whl → 0.5.2.dev20241211213024py3-none-any.whl