liger-kernel-nightly 0.5.2.dev20241212060959__py3-none-any.whl → 0.5.2.dev20241216214323__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -125,6 +125,10 @@ class LigerORPOTrainer(ORPOTrainer):
125
125
  outputs.last_hidden_state,
126
126
  concatenated_batch["concatenated_labels"],
127
127
  )
128
+ # if aux_loss_enabled, add the aux_loss to the orpo_loss
129
+ if self.aux_loss_enabled:
130
+ orpo_loss += self.aux_loss_coef * outputs.aux_loss
131
+
128
132
  return orpo_loss, aux_outputs
129
133
 
130
134
  def get_batch_loss_metrics(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.2.dev20241212060959
3
+ Version: 0.5.2.dev20241216214323
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -136,7 +136,7 @@ With one line of code, Liger Kernel can increase throughput by more than 20% and
136
136
  > - Benchmark conditions: LLaMA 3-8B, Batch Size = 8, Data Type = `bf16`, Optimizer = AdamW, Gradient Checkpointing = True, Distributed Strategy = FSDP1 on 8 A100s.
137
137
  > - Hugging Face models start to OOM at a 4K context length, whereas Hugging Face + Liger Kernel scales up to 16K.
138
138
 
139
- ## Optimize post training with Liger Kernel
139
+ ## Optimize Post Training with Liger Kernel
140
140
 
141
141
  <p align="center">
142
142
  <img src="https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/post-training.png" width="50%" alt="Post Training">
@@ -55,12 +55,12 @@ liger_kernel/transformers/model/phi3.py,sha256=xUZPlaPKwknLjHc3uUW3EPodm1h0vD3G7
55
55
  liger_kernel/transformers/model/qwen2.py,sha256=EyhSSzQOskGjSnCsKMZpd1s5IAIlHd5PBO3q0MoCs00,9619
56
56
  liger_kernel/transformers/model/qwen2_vl.py,sha256=bIQe2bWiY--G84FhCD29Gdi64_qHP6vbcGsK6vKysQE,8547
57
57
  liger_kernel/transformers/trainer/__init__.py,sha256=c4OQVJmhNOloj0JYSEc0j_cQuBbzGWILfaowUR1hmRw,210
58
- liger_kernel/transformers/trainer/orpo_trainer.py,sha256=jko6oq_XQdBSmXubp05E-_YXOyhtB5Bj75dg5YNwOsE,7517
58
+ liger_kernel/transformers/trainer/orpo_trainer.py,sha256=GCwwYjZbnu-X5TYKSv4hz4EPkZtH2o45X1xHv4p-Pik,7680
59
59
  liger_kernel/triton/__init__.py,sha256=yfRe0zMb47QnqjecZWG7LnanfCTzeku7SgWRAwNVmzU,101
60
60
  liger_kernel/triton/monkey_patch.py,sha256=5BcGKTtdqeYchypBIBopGIWPx1-cFALz7sOKoEsqXJ0,1584
61
- liger_kernel_nightly-0.5.2.dev20241212060959.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
62
- liger_kernel_nightly-0.5.2.dev20241212060959.dist-info/METADATA,sha256=fzcvxLT4DiKb7eqnHAw1NZvVvymMYXlaUnMUo1kLqmM,21055
63
- liger_kernel_nightly-0.5.2.dev20241212060959.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
64
- liger_kernel_nightly-0.5.2.dev20241212060959.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
65
- liger_kernel_nightly-0.5.2.dev20241212060959.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
66
- liger_kernel_nightly-0.5.2.dev20241212060959.dist-info/RECORD,,
61
+ liger_kernel_nightly-0.5.2.dev20241216214323.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
62
+ liger_kernel_nightly-0.5.2.dev20241216214323.dist-info/METADATA,sha256=ybbjoZ_TBOi601YdSxUk8hi6A9LLKq8SOUlE94VKkvs,21055
63
+ liger_kernel_nightly-0.5.2.dev20241216214323.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
64
+ liger_kernel_nightly-0.5.2.dev20241216214323.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
65
+ liger_kernel_nightly-0.5.2.dev20241216214323.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
66
+ liger_kernel_nightly-0.5.2.dev20241216214323.dist-info/RECORD,,