PyPI - liger-kernel-nightly - Versions diffs - 0.5.2.dev20250122005057__py3-none-any.whl → 0.5.2.dev20250124002122__py3-none-any.whl - Mend

liger-kernel-nightly 0.5.2.dev20250122005057py3-none-any.whl → 0.5.2.dev20250124002122py3-none-any.whl

Files changed (11) hide show

liger_kernel/chunked_loss/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Liger FlexChunkLoss: Alignment and Distillation loss
-Liger FlexChunkLoss offers a versatile interface, delivering up to 80% memory savings and a 10% throughput boost for post-training loss functions, including alignment (DPO, ORPO, CPO) and very soon, distillation. Its flexible design supports custom losses, ensuring efficiency gains across diverse use cases.
+Liger FlexChunkLoss offers a versatile interface, delivering up to 80% memory savings and a 10% throughput boost for post-training loss functions, including alignment (DPO, ORPO, CPO, KTO) and very soon, distillation. Its flexible design supports custom losses, ensuring efficiency gains across diverse use cases.
 ### User interface

liger_kernel/chunked_loss/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from liger_kernel.chunked_loss.cpo_loss import LigerFusedLinearCPOLoss  # noqa: F401
 from liger_kernel.chunked_loss.dpo_loss import LigerFusedLinearDPOLoss  # noqa: F401
+from liger_kernel.chunked_loss.kto_loss import LigerFusedLinearKTOLoss  # noqa: F401
 from liger_kernel.chunked_loss.orpo_loss import LigerFusedLinearORPOLoss  # noqa: F401
 from liger_kernel.chunked_loss.simpo_loss import LigerFusedLinearSimPOLoss  # noqa: F401

liger_kernel/chunked_loss/functional.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from liger_kernel.chunked_loss.cpo_loss import LigerFusedLinearCPOFunction
 from liger_kernel.chunked_loss.dpo_loss import LigerFusedLinearDPOFunction
+from liger_kernel.chunked_loss.kto_loss import LigerFusedLinearKTOFunction
 from liger_kernel.chunked_loss.orpo_loss import LigerFusedLinearORPOFunction
 from liger_kernel.chunked_loss.simpo_loss import LigerFusedLinearSimPOFunction
@@ -7,3 +8,4 @@ liger_fused_linear_orpo = LigerFusedLinearORPOFunction.apply
 liger_fused_linear_dpo = LigerFusedLinearDPOFunction.apply
 liger_fused_linear_cpo = LigerFusedLinearCPOFunction.apply
 liger_fused_linear_simpo = LigerFusedLinearSimPOFunction.apply
+liger_fused_linear_kto = LigerFusedLinearKTOFunction.apply

liger_kernel/chunked_loss/fused_linear_unpaired_preference.py ADDED Viewed

@@ -0,0 +1,246 @@
+from abc import abstractmethod
+from functools import partial
+import torch
+from torch.nn import functional as F
+class LigerFusedLinearUnpairedPreferenceBase(torch.autograd.Function):
+    @abstractmethod
+    def preference_loss_fn(*args, **kwargs):
+        """
+        To be extended by subclasses.
+        """
+        raise NotImplementedError("Preference loss function must be implemented.")
+    @staticmethod
+    def forward(
+        ctx,
+        _input,
+        weight,
+        target,
+        preference_labels,
+        bias=None,
+        loss_fn=None,
+        chunk_size=1,
+        ignore_index=-100,
+        compiled=True,
+        use_ref_model=False,
+        ref_input=None,
+        ref_weight=None,
+        ref_bias=None,
+        **loss_kwargs,
+    ):
+        """
+        Base class for fused linear layer with unpaired preference loss like KTO
+        Expects _input to be stacked with chosen and rejected inputs on the batch dimension.
+        The mental model is:
+        forward()
+        ├── Loop over chunks
+            └── compute_loss()
+                ├── chunk_forward()  # Compute logits and log probs
+                └── prefer_loss()    # Calculate preference loss
+        Args:
+            _input (torch.Tensor): Input tensor. Shape: (batch_size, seq_len, hidden_size).
+            weight (torch.Tensor): Weight tensor. Shape: (vocab_size, hidden_size).
+            target (torch.Tensor): Target tensor. Shape: (batch_size, seq_len).
+            bias (torch.Tensor, optional): Bias tensor. Shape: (vocab_size,).
+            loss_fn (callable): Loss function to compute the loss on a chunk of input/target.
+            chunk_size (int): Size of a chunk (# of batches of stacked chosen and rejected inputs).
+            ignore_index (int): Index to ignore for loss computation.
+            beta (float): Weight for the preference loss.
+            compiled (bool): Whether to use torch compile for chunk accumulation.
+            use_ref_model (bool): Whether to use a reference model for the alignment loss.
+            preference_labels (torch.Tensor): Boolean tensor indicating chosen (True) vs rejected (False) examples.
+                Shape: (batch_size,).
+            ref_weight (torch.Tensor): Reference weight tensor. Shape: (vocab_size, hidden_size).
+            ref_bias (torch.Tensor, optional): Reference bias tensor. Shape: (vocab_size,).
+            loss_kwargs (dict): Other possible arguments that a loss function might need
+        """
+        # TODO: Tune CHUNK_SIZE to fully utilize the GPU
+        CHUNK_SIZE = chunk_size
+        # Gradients to be accumulated
+        grad_inputs = []
+        grad_weight = torch.zeros_like(weight)
+        grad_bias = torch.zeros_like(bias) if bias is not None else None
+        # Loss to be accumulated
+        loss_acc = torch.zeros((), device=_input.device)
+        compute_loss = partial(
+            LigerFusedLinearUnpairedPreferenceBase._compute_loss,
+            preference_loss_fn=loss_fn,
+            full_target=target,
+            ignore_index=ignore_index,
+            use_ref_model=use_ref_model,
+            ref_weight=ref_weight,
+            ref_bias=ref_bias,
+            **loss_kwargs,
+        )
+        def fused_fwd_bwd(input_chunk, target_chunk, preference_labels_chunk, ref_input_chunk):
+            """
+            Fused forward and backward pass for a chunk of input and target.
+            """
+            argnums = (0, 1, 4) if bias is not None else (0, 1)
+            return torch.func.grad_and_value(compute_loss, argnums=argnums, has_aux=False)(
+                input_chunk,
+                weight,
+                target_chunk,
+                preference_labels_chunk,
+                bias,
+                ref_input_chunk=ref_input_chunk,
+            )
+        def accumulate_chunk(
+            input_chunk,
+            target_chunk,
+            preference_labels_chunk=None,
+            ref_input_chunk=None,
+        ):
+            (chunk_grad_input, chunk_grad_weight, *chunk_grad_bias), (chunk_loss) = fused_fwd_bwd(
+                input_chunk, target_chunk, preference_labels_chunk, ref_input_chunk
+            )
+            if bias is not None:
+                grad_bias.add_(chunk_grad_bias[0])  # accumulate bias gradient
+            # Accumulate gradients
+            grad_weight.add_(chunk_grad_weight)
+            grad_inputs.append(chunk_grad_input)
+            # Accumulate loss
+            loss_acc.add_(chunk_loss)
+        if compiled:
+            fused_fwd_bwd = torch.compile(fused_fwd_bwd)
+        # When not paired, use labels to separate chosen and rejected
+        assert preference_labels is not None, "preference_labels must be provided for unpaired preference loss"
+        chunks = max(1, _input.shape[0] // CHUNK_SIZE)
+        _input_chunks = torch.chunk(_input, chunks=chunks, dim=0)
+        _target_chunks = torch.chunk(target, chunks=chunks, dim=0)
+        _preference_labels_chunks = torch.chunk(preference_labels, chunks=chunks, dim=0)
+        if use_ref_model:
+            _ref_input_chunks = torch.chunk(ref_input, chunks=chunks, dim=0)
+        for (
+            input_chunk,
+            target_chunk,
+            ref_input_chunk,
+            preference_labels_chunk,
+        ) in zip(
+            _input_chunks,
+            _target_chunks,
+            (_ref_input_chunks if use_ref_model else [None] * len(_input_chunks)),
+            _preference_labels_chunks,
+        ):
+            # mark input_chunk, target_chunk, and target dimension 1 (sequence length) as dynamic to prevent torch.compile recompilation
+            torch._dynamo.mark_dynamic(input_chunk, 1)
+            torch._dynamo.mark_dynamic(target_chunk, 1)
+            torch._dynamo.mark_dynamic(target, 1)
+            torch._dynamo.mark_dynamic(ref_input_chunk, 1) if use_ref_model else None
+            torch._dynamo.mark_dynamic(preference_labels_chunk, 1)
+            # accumulate loss, gradients, and metrics
+            accumulate_chunk(input_chunk, target_chunk, preference_labels_chunk, ref_input_chunk)
+        ctx.save_for_backward(
+            torch.cat(grad_inputs, dim=0),
+            grad_weight,
+            grad_bias,
+        )
+        return loss_acc
+    @staticmethod
+    def backward(ctx, *grad_output):
+        grad_input, grad_weight, grad_bias = ctx.saved_tensors
+        if torch.ne(grad_output[0][0], torch.tensor(1.0, device=grad_output[0][0].device)):
+            grad_input = grad_input * grad_output[0][0]
+            grad_weight = grad_weight * grad_output[0][0]
+            grad_bias = grad_bias * grad_output[0][0] if grad_bias is not None else None
+        return grad_input, grad_weight, None, None, grad_bias
+    @staticmethod
+    def chunk_forward(
+        input_chunk,
+        weight,
+        target_chunk,
+        bias=None,
+        ignore_index=-100,
+    ):
+        logits_chunk = input_chunk @ weight.t()
+        if bias is not None:
+            logits_chunk = logits_chunk + bias
+        log_probs_chunk = F.log_softmax(logits_chunk.float(), dim=-1)
+        loss_mask_chunk = target_chunk != ignore_index
+        label_chunk = torch.where(loss_mask_chunk, target_chunk, 0)
+        per_token_logps_chunk = log_probs_chunk.gather(-1, label_chunk.unsqueeze(-1)).squeeze(-1)
+        average_log_prob_chunk = (per_token_logps_chunk * loss_mask_chunk).sum(-1) / loss_mask_chunk.sum(-1)
+        return average_log_prob_chunk
+    @staticmethod
+    def _compute_loss(
+        input_chunk,
+        weight,
+        target_chunk,
+        preference_labels_chunk,
+        bias=None,
+        preference_loss_fn=None,
+        full_target=None,
+        ignore_index=-100,
+        use_ref_model=False,
+        ref_input_chunk=None,
+        ref_weight=None,
+        ref_bias=None,
+        **loss_kwargs,
+    ):
+        """
+        Compute the total loss for a chunk of input and target, while using an alignment/preference loss function.
+        Args:
+            preference_loss_fn (callable): Loss function to compute the loss on a chunk of input/target.
+            input_chunk (torch.Tensor): Chunk of input tensor. Shape: (2 * chunk_size, sequence_length, hidden_size).
+            weight (torch.Tensor): Weight tensor. Shape: (vocab_size, hidden_size).
+            target_chunk (torch.Tensor): Chunk of target tensor. Shape: (2 * chunk_size, sequence_length).
+            bias (torch.Tensor, optional): Bias tensor. Shape: (vocab_size,).
+            full_target (torch.Tensor): Full target tensor. Shape: (batch_size, sequence_length).
+            ignore_index (int): Index to ignore for loss computation.
+            use_ref_model (bool): Whether to use a reference model for the alignment loss.
+            ref_weight (torch.Tensor): Reference weight tensor. Shape: (vocab_size, hidden_size).
+            ref_bias (torch.Tensor, optional): Reference bias tensor. Shape: (vocab_size,).
+            loss_kwargs (dict): Additional arguments for the loss function.
+        """
+        average_log_prob_chunk = LigerFusedLinearUnpairedPreferenceBase.chunk_forward(
+            input_chunk,
+            weight,
+            target_chunk,
+            bias=bias,
+            ignore_index=ignore_index,
+        )
+        if use_ref_model:
+            with torch.no_grad():
+                ref_average_log_prob_chunk = LigerFusedLinearUnpairedPreferenceBase.chunk_forward(
+                    ref_input_chunk,
+                    ref_weight,
+                    target_chunk,
+                    ref_bias,
+                    ignore_index=ignore_index,
+                )
+            loss_kwargs["ref_average_log_prob_chunk"] = ref_average_log_prob_chunk
+        preference_loss_chunk = preference_loss_fn(
+            average_log_prob_chunk, preference_labels_chunk, full_target, **loss_kwargs
+        )
+        return preference_loss_chunk

liger_kernel/chunked_loss/kto_loss.py ADDED Viewed

@@ -0,0 +1,172 @@
+import torch
+import torch.nn.functional as F
+from liger_kernel.chunked_loss.fused_linear_unpaired_preference import LigerFusedLinearUnpairedPreferenceBase
+class LigerFusedLinearKTOFunction(LigerFusedLinearUnpairedPreferenceBase):
+    @staticmethod
+    def preference_loss_fn(
+        average_log_prob_chunk,
+        preference_labels_chunk,
+        full_target,
+        ref_average_log_prob_chunk=None,
+        beta=0.1,
+        kl=None,
+    ):
+        """
+        Implements the Kahneman-Tversky Optimization (KTO) loss function.
+        Paper: "KTO: Model Alignment as Prospect Theory-Guided Optimization"
+        https://arxiv.org/abs/2402.01306
+        KTO loss is inspired by prospect theory (https://en.wikipedia.org/wiki/Prospect_theory)
+        from behavioral economics, which models how humans make decisions under uncertainty.
+        The loss function is asymmetric, treating gains and losses differently, similar to
+        human decision-making patterns.
+        Formula:
+        When y is chosen:
+        L_KTO = 1 - σ(β * (log[π(x)/π₀(x)] - KL(π||π₀)_y))
+        When y is rejected:
+        L_KTO = 1 - σ(β * (KL(π||π₀)_y - log[π(x)/π₀(x)]))
+        Where:
+        - σ: Sigmoid function
+        - β: Temperature parameter controlling the strength of the preference signal
+        - π(x): Policy (current model)
+        - π₀(x): Reference policy (reference model)
+        - KL(π||π₀)_y: KL divergence estimated using the rejected response y
+        The loss encourages the model to:
+        1. Assign higher probability to chosen responses
+        2. Assign lower probability to rejected responses
+        3. Maintain reasonable distance from the reference model
+        Args:
+            chosen_logps: Log probabilities of chosen tokens (batch_size,)
+            rejected_logps: Log probabilities of rejected tokens (batch_size,)
+            full_target: Non chunked full target tensor
+            ref_chosen_logps: Reference log probs of chosen tokens (batch_size,)
+            ref_rejected_logps: Reference log probs of rejected tokens (batch_size,)
+            beta: Weight for the direct preference loss
+            kl: KL divergence between the policy model and the reference model for the chosen responses. Shape: (batch_size,)
+        Returns:
+            Tuple of (loss, chosen_rewards, rejected_rewards):
+            - loss: The KTO loss value
+            - chosen_rewards: Reward signals for chosen responses (detached)
+            - rejected_rewards: Reward signals for rejected responses (detached)
+        """
+        logratios_chunk = average_log_prob_chunk - ref_average_log_prob_chunk
+        multiplier_chunk = torch.where(preference_labels_chunk, 1, -1)
+        if kl is not None:
+            losses = 1 - F.sigmoid(beta * (logratios_chunk - kl) * multiplier_chunk)
+        else:
+            losses = 1 - F.sigmoid(beta * logratios_chunk * multiplier_chunk)
+        return losses.sum() / (full_target.shape[0])
+    @staticmethod
+    def forward(
+        ctx,
+        _input,
+        weight,
+        target,
+        preference_labels,
+        bias=None,
+        ref_input=None,
+        ref_weight=None,
+        ref_bias=None,
+        kl=None,
+        ignore_index=-100,
+        beta=0.1,
+        compiled=True,
+        use_ref_model=True,
+    ):
+        return LigerFusedLinearUnpairedPreferenceBase.forward(
+            ctx=ctx,
+            _input=_input,
+            weight=weight,
+            target=target,
+            preference_labels=preference_labels,
+            bias=bias,
+            loss_fn=LigerFusedLinearKTOFunction.preference_loss_fn,
+            ignore_index=ignore_index,
+            beta=beta,
+            compiled=compiled,
+            use_ref_model=use_ref_model,
+            ref_input=ref_input,
+            ref_weight=ref_weight,
+            ref_bias=ref_bias,
+            kl=kl,
+        )
+    @staticmethod
+    def backward(ctx, *grad_output):
+        grads = LigerFusedLinearUnpairedPreferenceBase.backward(ctx, grad_output)[:5]
+        return (
+            *grads,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
+class LigerFusedLinearKTOLoss(torch.nn.Module):
+    """
+    Fused linear layer with Kahneman-Tversky Optimization (KTO) loss.
+    """
+    def __init__(
+        self,
+        ignore_index: int = -100,
+        beta: float = 0.1,
+        compiled: bool = True,
+        use_ref_model: bool = False,
+    ):
+        """
+        Args:
+            ignore_index (int): Index to ignore in the loss calculation
+            beta (float): Temperature parameter for the KTO loss
+            compiled (bool): Whether to use compiled operations
+            use_ref_model (bool): Whether to use a reference model for the DPO loss.
+        """
+        super().__init__()
+        self.ignore_index = ignore_index
+        self.beta = beta
+        self.compiled = compiled
+        self.use_ref_model = use_ref_model
+    def forward(
+        self,
+        _input,
+        lin_weight,
+        target,
+        bias=None,
+        preference_labels=None,
+        ref_input=None,
+        ref_weight=None,
+        ref_bias=None,
+        kl=None,
+    ):
+        return LigerFusedLinearKTOFunction.apply(
+            _input,
+            lin_weight,
+            target,
+            preference_labels,
+            bias,
+            ref_input,
+            ref_weight,
+            ref_bias,
+            kl,
+            self.ignore_index,
+            self.beta,
+            self.compiled,
+            self.use_ref_model,
+        )

{liger_kernel_nightly-0.5.2.dev20250122005057.dist-info → liger_kernel_nightly-0.5.2.dev20250124002122.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: liger_kernel_nightly
-Version: 0.5.2.dev20250122005057
+Version: 0.5.2.dev20250124002122
 Summary: Efficient Triton kernels for LLM Training
 License: BSD 2-CLAUSE LICENSE
         Copyright 2024 LinkedIn Corporation

{liger_kernel_nightly-0.5.2.dev20250122005057.dist-info → liger_kernel_nightly-0.5.2.dev20250124002122.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,15 @@
 liger_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 liger_kernel/env_report.py,sha256=uhdEC8OydxoZlb7B6YYcAaBF3crGFdIck-4cxaW4NJY,1728
 liger_kernel/utils.py,sha256=HJa-xVKOohDn6pLVIx-Fv0V9h0QAL3qZGQNRICI-OpI,249
-liger_kernel/chunked_loss/README.md,sha256=K6rucm6nqHpWCmxUOhBYcE3apwQxAy0TfRUippR7Icw,2243
-liger_kernel/chunked_loss/__init__.py,sha256=R2wCcz4Y0kTAve926DH3k182XKezpXeACMHj05g9Mm8,346
+liger_kernel/chunked_loss/README.md,sha256=0FmkFC3hKBqyoDT5uTlIYmrvRkF-EOCR1y-EBU1LpWU,2248
+liger_kernel/chunked_loss/__init__.py,sha256=CI6hBI7VldTX748c7F6F8YpHTn1q4gv5-lMXf273oXQ,431
 liger_kernel/chunked_loss/cpo_loss.py,sha256=OdBR8WYdHTKpLI_c9DcuwqKSWPeAAeTyREz46Vu_cAY,3682
 liger_kernel/chunked_loss/dpo_loss.py,sha256=VYZMOafdvE8xlhvTtwjrz81tIzxR1mHF4lXdsADnIQg,4373
-liger_kernel/chunked_loss/functional.py,sha256=9Gr-YXIuEzEJkBUhDx3G2fuQayckLor7cC7svhmPML4,549
+liger_kernel/chunked_loss/functional.py,sha256=dO0DYMPTBxwPtEUQ1DUV2zCmZ6i-k3B7COeR3-IwA6M,683
 liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=uQtwtu-kaUZJTjNhAnIr3O794oUlUZ98XR5shYtwP5k,10440
 liger_kernel/chunked_loss/fused_linear_preference.py,sha256=idK9V9NivoVITqVpiG0fEGUHSvinYWkn9-EYXZjR-KQ,18356
+liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=ZqYlXXhIphkJPxOS7iI70avgrr6x0skEtgpckZTYau0,9819
+liger_kernel/chunked_loss/kto_loss.py,sha256=eVNW6HVCAm32shpfhbRlk92Flnjd7G32v0gK9DUUSOQ,5655
 liger_kernel/chunked_loss/orpo_loss.py,sha256=yjcrrbVeemLYodoSKT-FMSnaPtyKAZ3aOrvPD6tTY6Y,3617
 liger_kernel/chunked_loss/simpo_loss.py,sha256=3TTc7U79Orjgi-Wu81WZkWk5MgsdqKXIOBHgIvDazPw,3865
 liger_kernel/ops/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -58,9 +60,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
 liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
 liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
 liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
-liger_kernel_nightly-0.5.2.dev20250122005057.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
-liger_kernel_nightly-0.5.2.dev20250122005057.dist-info/METADATA,sha256=gJVZt5FDXFWicUDfpBwjlVd3DDevuo730bQpxQ5VMuI,21140
-liger_kernel_nightly-0.5.2.dev20250122005057.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
-liger_kernel_nightly-0.5.2.dev20250122005057.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-liger_kernel_nightly-0.5.2.dev20250122005057.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
-liger_kernel_nightly-0.5.2.dev20250122005057.dist-info/RECORD,,
+liger_kernel_nightly-0.5.2.dev20250124002122.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
+liger_kernel_nightly-0.5.2.dev20250124002122.dist-info/METADATA,sha256=XkhmLkKGR1Tuel5f-4SxOwiE2AP0jrWAmkN8jrQcB_U,21140
+liger_kernel_nightly-0.5.2.dev20250124002122.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
+liger_kernel_nightly-0.5.2.dev20250124002122.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+liger_kernel_nightly-0.5.2.dev20250124002122.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
+liger_kernel_nightly-0.5.2.dev20250124002122.dist-info/RECORD,,

{liger_kernel_nightly-0.5.2.dev20250122005057.dist-info → liger_kernel_nightly-0.5.2.dev20250124002122.dist-info}/LICENSE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.2.dev20250122005057.dist-info → liger_kernel_nightly-0.5.2.dev20250124002122.dist-info}/NOTICE RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.2.dev20250122005057.dist-info → liger_kernel_nightly-0.5.2.dev20250124002122.dist-info}/WHEEL RENAMED Viewed

File without changes

{liger_kernel_nightly-0.5.2.dev20250122005057.dist-info → liger_kernel_nightly-0.5.2.dev20250124002122.dist-info}/top_level.txt RENAMED Viewed

File without changes

liger-kernel-nightly 0.5.2.dev20250122005057__py3-none-any.whl → 0.5.2.dev20250124002122__py3-none-any.whl

liger-kernel-nightly 0.5.2.dev20250122005057py3-none-any.whl → 0.5.2.dev20250124002122py3-none-any.whl