npm - cciwon-code-review-cli - Versions diffs - 2.0.1 → 2.0.3 - Mend

cciwon-code-review-cli 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

package/unsloth_compiled_cache/UnslothPRMTrainer.py ADDED Viewed

@@ -0,0 +1,979 @@
+"""
+2025.12.6
+2025.12.7
+4.57.1
+0.24.0
+__UNSLOTH_VERSIONING__
+"""
+# Unsloth auto generated code
+# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from torch import Tensor
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable
+from trl.trainer.prm_trainer import (BaseImageProcessor, BaseTrainer, Callable, DataCollator, DataCollatorForTokenClassification, Dataset, EvalPrediction, FeatureExtractionMixin, Optional, PRMConfig, PRMTrainer, PartialState, Path, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, TrainerCallback, Union, chain, compute_accuracy, disable_dropout_in_model, features, nn, os, prepare_peft_model, textwrap, torch, warnings, Optional, PreTrainedModel, os, torch)
+import os
+from typing import *
+from dataclasses import dataclass, field
+from packaging.version import Version
+import torch
+import numpy as np
+from contextlib import nullcontext
+from torch.nn import functional as F
+import inspect
+from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling
+from transformers.training_args import ParallelMode
+# Wrap trainer with padding to right and enable training mode
+# Also patches W&B since multiple runs must use wandb.finish()
+import functools
+from types import MethodType
+def prepare_for_training_mode(f):
+    @functools.wraps(f)
+    def wrapper(self, *args, **kwargs):
+        # Enable training mode
+        if hasattr(self, 'model') and hasattr(self.model, "for_training"):
+            self.model.for_training()
+        output = f(self, *args, **kwargs)
+        # Return inference mode
+        if hasattr(self, 'model') and hasattr(self.model, "for_inference"):
+            self.model.for_inference()
+        # Patch W&B to enable logging on future runs, otherwise it'll overwrite the first run
+        try:
+            import wandb
+            wandb.finish()
+        except:
+            pass
+        return output
+    return wrapper
+pass
+torch_compile_options = {
+    "epilogue_fusion"   : True,
+    "max_autotune"      : False,
+    "shape_padding"     : True,
+    "trace.enabled"     : False,
+    "triton.cudagraphs" : False,
+}
+@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,)
+def chunked_selective_log_softmax(logits, index):
+    # Split into 4 chunks only
+    chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0)
+    chunked_index  = torch.chunk(index.reshape(-1), chunks = 4, dim = 0)
+    all_per_token_logps = []
+    # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index)
+    for chunk_logits, chunk_index in zip(chunked_logits, chunked_index):
+        chunk_logits = chunk_logits.to(torch.float32)
+        selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1)
+        logsumexp_values = torch.logsumexp(chunk_logits, dim = -1)
+        per_token_logps = selected_logits - logsumexp_values
+        all_per_token_logps.append(per_token_logps)
+    pass
+    all_per_token_logps = torch.concat(all_per_token_logps)
+    all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1]))
+    return all_per_token_logps
+def calculate_pad_tokens_in_prompt(
+    input_ids: torch.Tensor,
+    logits_to_keep: int,
+    pad_token_id: int
+) -> torch.Tensor:
+    """
+    Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens
+    """
+    if logits_to_keep >= input_ids.shape[1]:
+        raise ValueError("logits_to_keep must be smaller than the sequence length.")
+    prompt_section = input_ids[:, :-logits_to_keep]
+    padding_mask = (prompt_section == pad_token_id)
+    pad_token_counts = padding_mask.sum(dim=1)
+    return pad_token_counts
+def create_completion_attention_mask(
+    completion_input_ids: torch.Tensor,
+    left_pad_tokens_per_prompt: torch.Tensor,
+    max_left_pad: int,
+    pad_token_id: int
+) -> torch.Tensor:
+    """
+    Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad]
+    Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens
+    and pad are pad tokens, this function would make a completion mask that would 0 out the pad
+    and p tokens. so in this example [0,0,0,1,1,1,0,0,0]
+    """
+    batch_size, completion_len = completion_input_ids.shape
+    device = completion_input_ids.device
+    num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt
+    indices = torch.arange(completion_len, device=device).unsqueeze(0)
+    shift_mask = indices >= num_tokens_to_mask.unsqueeze(1)
+    non_padding_mask = (completion_input_ids != pad_token_id)
+    final_mask = shift_mask & non_padding_mask
+    return final_mask
+def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor:
+    """
+    Moves all padding tokens in each sequence of a batch to the right.
+    """
+    mask = (tensor != pad_id)
+    # Must do stable=True since binary mark is unordered
+    sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True)
+    packed_tensor = torch.gather(tensor, 1, sorted_indices)
+    return packed_tensor
+def align_logprobs_with_mask(
+    logprob_tensor: torch.Tensor,
+    attention_mask: torch.Tensor,
+    pad_value: float = 0.0
+) -> torch.Tensor:
+    """
+    Aligns a log probability tensor with a given attention mask.
+    """
+    device = logprob_tensor.device
+    batch_size, logprob_seq_len = logprob_tensor.shape
+    mask_seq_len = attention_mask.shape[1]
+    padded_logprobs = torch.full(
+        attention_mask.shape,
+        fill_value=pad_value,
+        dtype=logprob_tensor.dtype,
+        device=device
+    )
+    left_pad_counts = torch.argmax(attention_mask, dim=1)
+    cols = torch.arange(logprob_seq_len, device=device)
+    dest_indices = left_pad_counts.unsqueeze(1) + cols
+    # Create destination row indices
+    # Shape: [batch_size, logprob_seq_len]
+    row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices)
+    # --- 4. Filter out-of-bounds indices and perform assignment ---
+    # Create a mask to identify only the indices that are within the bounds
+    # of the target tensor's sequence length.
+    valid_mask = dest_indices < mask_seq_len
+    # Use this mask to select only the valid row indices, column indices,
+    # and the corresponding values from the logprob tensor.
+    # This flattens the selected elements into 1D tensors.
+    valid_rows = row_indices[valid_mask]
+    valid_cols = dest_indices[valid_mask]
+    valid_vals = logprob_tensor[valid_mask]
+    # Place the valid values into their correct positions in the padded tensor
+    # using a single, efficient advanced indexing operation.
+    padded_logprobs[valid_rows, valid_cols] = valid_vals
+    return padded_logprobs
+@dataclass
+class UnslothPRMConfig(PRMConfig):
+    """
+    Configuration class for the [`PRMTrainer`].
+    This class includes only the parameters that are specific to PRM training. For a full list of training arguments,
+    please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may
+    differ from those in [`~transformers.TrainingArguments`].
+    Using [`~transformers.HfArgumentParser`] we can turn this class into
+    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
+    command line.
+    Parameters:
+        max_length (`int` or `None`, *optional*, defaults to `1024`):
+            Maximum length of the sequences (prompt + completion) used for truncation.
+        max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
+            Maximum length of the prompt used for truncation.
+        max_completion_length (`int`, *optional*):
+            Maximum length of the completion used for truncation. The completion is the concatenation of the steps.
+        disable_dropout (`bool`, *optional*, defaults to `True`):
+            Whether to disable dropout in the model.
+        step_separator (`str`, *optional*, defaults to `"\n"`):
+            Separator used to separate each step of the reasoning process.
+        train_on_last_step_only (`bool`, *optional*, defaults to `False`):
+            Whether to train only on the last step.
+        dataset_num_proc (`int`, *optional*):
+            Number of processes to use for processing the dataset.
+    """
+    vllm_sampling_params: Optional[Any] = field(
+        default = None,
+        metadata = {'help': 'vLLM SamplingParams'},
+    )
+    unsloth_num_chunks : Optional[int] = field(
+        default = -1,
+        metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'},
+    )
+    max_seq_length : Optional[int] = field(
+        default = None,
+        metadata = {'help': 'Maximum sequence length to truncate to.'},
+    )
+    def __init__(
+        self,
+        output_dir = None,
+        overwrite_output_dir = None,
+        do_train = False,
+        do_eval = False,
+        do_predict = False,
+        eval_strategy = 'no',
+        prediction_loss_only = False,
+        per_device_train_batch_size = 4,
+        per_device_eval_batch_size = 4,
+        per_gpu_train_batch_size = None,
+        per_gpu_eval_batch_size = None,
+        gradient_accumulation_steps = 2,
+        eval_accumulation_steps = 2,
+        eval_delay = 0,
+        torch_empty_cache_steps = 250,
+        learning_rate = 5e-05,
+        weight_decay = 0.01,
+        adam_beta1 = 0.9,
+        adam_beta2 = 0.999,
+        adam_epsilon = 1e-08,
+        max_grad_norm = 1.0,
+        num_train_epochs = 3.0,
+        max_steps = -1,
+        lr_scheduler_type = 'linear',
+        warmup_ratio = 0.1,
+        warmup_steps = 0,
+        log_level = 'passive',
+        log_level_replica = 'warning',
+        log_on_each_node = True,
+        logging_dir = None,
+        logging_strategy = 'steps',
+        logging_first_step = False,
+        logging_steps = 1,
+        logging_nan_inf_filter = False,
+        save_strategy = 'steps',
+        save_steps = 500,
+        save_total_limit = None,
+        save_safetensors = True,
+        save_on_each_node = False,
+        save_only_model = False,
+        restore_callback_states_from_checkpoint = False,
+        no_cuda = False,
+        use_cpu = False,
+        use_mps_device = False,
+        seed = 3407,
+        data_seed = 3407,
+        jit_mode_eval = False,
+        bf16 = False,
+        fp16 = False,
+        fp16_opt_level = 'O1',
+        half_precision_backend = 'auto',
+        bf16_full_eval = False,
+        fp16_full_eval = False,
+        tf32 = None,
+        local_rank = -1,
+        ddp_backend = None,
+        tpu_num_cores = None,
+        tpu_metrics_debug = False,
+        debug = '',
+        dataloader_drop_last = False,
+        eval_steps = None,
+        dataloader_num_workers = 0,
+        dataloader_prefetch_factor = None,
+        past_index = -1,
+        run_name = None,
+        disable_tqdm = None,
+        remove_unused_columns = True,
+        label_names = None,
+        load_best_model_at_end = False,
+        metric_for_best_model = None,
+        greater_is_better = None,
+        ignore_data_skip = False,
+        fsdp = None,
+        fsdp_min_num_params = 0,
+        fsdp_config = None,
+        fsdp_transformer_layer_cls_to_wrap = None,
+        accelerator_config = None,
+        parallelism_config = None,
+        deepspeed = None,
+        label_smoothing_factor = 0.0,
+        optim = 'adamw_8bit',
+        optim_args = None,
+        adafactor = False,
+        group_by_length = False,
+        length_column_name = 'length',
+        report_to = 'none',
+        project = 'huggingface',
+        trackio_space_id = 'trackio',
+        ddp_find_unused_parameters = None,
+        ddp_bucket_cap_mb = None,
+        ddp_broadcast_buffers = None,
+        dataloader_pin_memory = True,
+        dataloader_persistent_workers = False,
+        skip_memory_metrics = True,
+        use_legacy_prediction_loop = False,
+        push_to_hub = False,
+        resume_from_checkpoint = None,
+        hub_model_id = None,
+        hub_strategy = 'every_save',
+        hub_token = None,
+        hub_private_repo = None,
+        hub_always_push = False,
+        hub_revision = None,
+        gradient_checkpointing = True,
+        gradient_checkpointing_kwargs = None,
+        include_inputs_for_metrics = False,
+        eval_do_concat_batches = True,
+        fp16_backend = 'auto',
+        push_to_hub_model_id = None,
+        push_to_hub_organization = None,
+        push_to_hub_token = None,
+        mp_parameters = '',
+        auto_find_batch_size = False,
+        full_determinism = False,
+        torchdynamo = None,
+        ray_scope = 'last',
+        ddp_timeout = 1800,
+        torch_compile = False,
+        torch_compile_backend = None,
+        torch_compile_mode = None,
+        include_tokens_per_second = False,
+        include_num_input_tokens_seen = False,
+        neftune_noise_alpha = None,
+        optim_target_modules = None,
+        batch_eval_metrics = False,
+        eval_on_start = False,
+        use_liger_kernel = False,
+        liger_kernel_config = None,
+        eval_use_gather_object = False,
+        average_tokens_across_devices = True,
+        max_length = 1024,
+        max_prompt_length = 512,
+        max_completion_length = None,
+        disable_dropout = True,
+        step_separator = '\
+',
+        train_on_last_step_only = False,
+        dataset_num_proc = None,
+        vllm_sampling_params = None,
+        unsloth_num_chunks = -1,
+        max_seq_length = None,
+        **kwargs,
+    ):
+        if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!')
+        if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!')
+        if output_dir is None and save_strategy == 'steps' and save_steps == 500:
+            output_dir = 'unsloth_training_checkpoints'
+            save_strategy = 'no'
+        if dataset_num_proc is None:
+            from multiprocessing import cpu_count
+            dataset_num_proc = min(max(cpu_count()+4, 2), 64)
+        super().__init__(
+            output_dir = output_dir,
+            overwrite_output_dir = overwrite_output_dir,
+            do_train = do_train,
+            do_eval = do_eval,
+            do_predict = do_predict,
+            eval_strategy = eval_strategy,
+            prediction_loss_only = prediction_loss_only,
+            per_device_train_batch_size = per_device_train_batch_size,
+            per_device_eval_batch_size = per_device_eval_batch_size,
+            per_gpu_train_batch_size = per_gpu_train_batch_size,
+            per_gpu_eval_batch_size = per_gpu_eval_batch_size,
+            gradient_accumulation_steps = gradient_accumulation_steps,
+            eval_accumulation_steps = eval_accumulation_steps,
+            eval_delay = eval_delay,
+            torch_empty_cache_steps = torch_empty_cache_steps,
+            learning_rate = learning_rate,
+            weight_decay = weight_decay,
+            adam_beta1 = adam_beta1,
+            adam_beta2 = adam_beta2,
+            adam_epsilon = adam_epsilon,
+            max_grad_norm = max_grad_norm,
+            num_train_epochs = num_train_epochs,
+            max_steps = max_steps,
+            lr_scheduler_type = lr_scheduler_type,
+            warmup_ratio = warmup_ratio,
+            warmup_steps = warmup_steps,
+            log_level = log_level,
+            log_level_replica = log_level_replica,
+            log_on_each_node = log_on_each_node,
+            logging_dir = logging_dir,
+            logging_strategy = logging_strategy,
+            logging_first_step = logging_first_step,
+            logging_steps = logging_steps,
+            logging_nan_inf_filter = logging_nan_inf_filter,
+            save_strategy = save_strategy,
+            save_steps = save_steps,
+            save_total_limit = save_total_limit,
+            save_safetensors = save_safetensors,
+            save_on_each_node = save_on_each_node,
+            save_only_model = save_only_model,
+            restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint,
+            no_cuda = no_cuda,
+            use_cpu = use_cpu,
+            use_mps_device = use_mps_device,
+            seed = seed,
+            data_seed = data_seed,
+            jit_mode_eval = jit_mode_eval,
+            bf16 = bf16,
+            fp16 = fp16,
+            fp16_opt_level = fp16_opt_level,
+            half_precision_backend = half_precision_backend,
+            bf16_full_eval = bf16_full_eval,
+            fp16_full_eval = fp16_full_eval,
+            tf32 = tf32,
+            local_rank = local_rank,
+            ddp_backend = ddp_backend,
+            tpu_num_cores = tpu_num_cores,
+            tpu_metrics_debug = tpu_metrics_debug,
+            debug = debug,
+            dataloader_drop_last = dataloader_drop_last,
+            eval_steps = eval_steps,
+            dataloader_num_workers = dataloader_num_workers,
+            dataloader_prefetch_factor = dataloader_prefetch_factor,
+            past_index = past_index,
+            run_name = run_name,
+            disable_tqdm = disable_tqdm,
+            remove_unused_columns = remove_unused_columns,
+            label_names = label_names,
+            load_best_model_at_end = load_best_model_at_end,
+            metric_for_best_model = metric_for_best_model,
+            greater_is_better = greater_is_better,
+            ignore_data_skip = ignore_data_skip,
+            fsdp = fsdp,
+            fsdp_min_num_params = fsdp_min_num_params,
+            fsdp_config = fsdp_config,
+            fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap,
+            accelerator_config = accelerator_config,
+            parallelism_config = parallelism_config,
+            deepspeed = deepspeed,
+            label_smoothing_factor = label_smoothing_factor,
+            optim = optim,
+            optim_args = optim_args,
+            adafactor = adafactor,
+            group_by_length = group_by_length,
+            length_column_name = length_column_name,
+            report_to = report_to,
+            project = project,
+            trackio_space_id = trackio_space_id,
+            ddp_find_unused_parameters = ddp_find_unused_parameters,
+            ddp_bucket_cap_mb = ddp_bucket_cap_mb,
+            ddp_broadcast_buffers = ddp_broadcast_buffers,
+            dataloader_pin_memory = dataloader_pin_memory,
+            dataloader_persistent_workers = dataloader_persistent_workers,
+            skip_memory_metrics = skip_memory_metrics,
+            use_legacy_prediction_loop = use_legacy_prediction_loop,
+            push_to_hub = push_to_hub,
+            resume_from_checkpoint = resume_from_checkpoint,
+            hub_model_id = hub_model_id,
+            hub_strategy = hub_strategy,
+            hub_token = hub_token,
+            hub_private_repo = hub_private_repo,
+            hub_always_push = hub_always_push,
+            hub_revision = hub_revision,
+            gradient_checkpointing = gradient_checkpointing,
+            gradient_checkpointing_kwargs = gradient_checkpointing_kwargs,
+            include_inputs_for_metrics = include_inputs_for_metrics,
+            eval_do_concat_batches = eval_do_concat_batches,
+            fp16_backend = fp16_backend,
+            push_to_hub_model_id = push_to_hub_model_id,
+            push_to_hub_organization = push_to_hub_organization,
+            push_to_hub_token = push_to_hub_token,
+            mp_parameters = mp_parameters,
+            auto_find_batch_size = auto_find_batch_size,
+            full_determinism = full_determinism,
+            torchdynamo = torchdynamo,
+            ray_scope = ray_scope,
+            ddp_timeout = ddp_timeout,
+            torch_compile = torch_compile,
+            torch_compile_backend = torch_compile_backend,
+            torch_compile_mode = torch_compile_mode,
+            include_tokens_per_second = include_tokens_per_second,
+            include_num_input_tokens_seen = include_num_input_tokens_seen,
+            neftune_noise_alpha = neftune_noise_alpha,
+            optim_target_modules = optim_target_modules,
+            batch_eval_metrics = batch_eval_metrics,
+            eval_on_start = eval_on_start,
+            use_liger_kernel = use_liger_kernel,
+            liger_kernel_config = liger_kernel_config,
+            eval_use_gather_object = eval_use_gather_object,
+            average_tokens_across_devices = average_tokens_across_devices,
+            max_length = max_length,
+            max_prompt_length = max_prompt_length,
+            max_completion_length = max_completion_length,
+            disable_dropout = disable_dropout,
+            step_separator = step_separator,
+            train_on_last_step_only = train_on_last_step_only,
+            dataset_num_proc = dataset_num_proc,**kwargs)
+        self.vllm_sampling_params = vllm_sampling_params
+        self.unsloth_num_chunks = unsloth_num_chunks
+        self.max_seq_length = max_seq_length
+pass
+class _UnslothPRMTrainer(BaseTrainer):
+    """"""
+    _tag_names = ["trl", "prm"]
+    _name = "PRM"
+    _paper = {
+        "title": "Solving math word problems with process-and outcome-based feedback",
+        "id": "2211.14275",
+        # docstyle-ignore
+        "citation": textwrap.dedent("""\
+            @article{uesato2022solving,
+                title        = {{Solving Math Word Problems With Process- and Outcome-Based Feedback}},
+                author       = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina},
+                year         = 2022,
+                journal      = {arXiv preprint arXiv:2211.14275}
+            }"""),
+    }
+    def __init__(
+        self,
+        model: Optional[Union[PreTrainedModel, nn.Module]] = None,
+        args: Optional[PRMConfig] = None,
+        data_collator: Optional[DataCollator] = None,
+        train_dataset: Optional[Dataset] = None,
+        eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None,
+        processing_class: Optional[
+            Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin]
+        ] = None,
+        model_init: Optional[Callable[[], PreTrainedModel]] = None,
+        compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None,
+        callbacks: Optional[list[TrainerCallback]] = None,
+        optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (
+            None,
+            None,
+        ),
+        preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
+        peft_config: Optional[dict] = None,
+    ):
+        if not os.environ.get("TRL_EXPERIMENTAL_SILENCE"):
+            warnings.warn(
+                "This trainer will soon be moved to trl.experimental and is a candidate for removal. If you rely on "
+                "it and want it to remain, please share your comments here: "
+                "https://github.com/huggingface/trl/issues/4223. Silence this warning by setting environment variable "
+                "TRL_EXPERIMENTAL_SILENCE=1."
+            )
+        if False:
+            model = prepare_peft_model(model, peft_config, args)
+        # Disable dropout in the model
+        if args.disable_dropout:
+            disable_dropout_in_model(model)
+        if compute_metrics is None:
+            compute_metrics = compute_accuracy
+        if data_collator is None:
+            if processing_class is None:
+                raise ValueError(
+                    "A processing_class must be specified when using the default DataCollatorForTokenClassification"
+                )
+            data_collator = DataCollatorForTokenClassification(processing_class, max_length=args.max_length)
+        if "input_ids" not in train_dataset.column_names:
+            with PartialState().main_process_first():
+                fn_kwargs = {
+                    "tokenizer": processing_class,
+                    "step_separator": args.step_separator,
+                    "max_length": args.max_length,
+                    "max_prompt_length": args.max_prompt_length,
+                    "max_completion_length": args.max_completion_length,
+                    "train_on_last_step_only": args.train_on_last_step_only,
+                }
+                train_fn_kwargs = {**fn_kwargs, "is_eval": False}
+                train_dataset = train_dataset.map(
+                    self.tokenize_row,
+                    fn_kwargs=train_fn_kwargs,
+                    num_proc=args.dataset_num_proc,
+                    remove_columns=train_dataset.features,
+                    desc="Tokenizing train dataset",
+                    features=features.Features(  # needed to avoid map to cast labels to bool
+                        {
+                            "labels": features.Sequence(features.Value("int64")),
+                            "input_ids": features.Sequence(features.Value("int64")),
+                        }
+                    ),
+                )
+                eval_fn_kwargs = {**fn_kwargs, "is_eval": True}
+                if eval_dataset is not None:
+                    eval_dataset = eval_dataset.map(
+                        self.tokenize_row,
+                        fn_kwargs=eval_fn_kwargs,
+                        num_proc=args.dataset_num_proc,
+                        remove_columns=eval_dataset.features,
+                        desc="Tokenizing eval dataset",
+                        features=features.Features(  # needed to avoid map to cast labels to bool
+                            {
+                                "labels": features.Sequence(features.Value("int64")),
+                                "input_ids": features.Sequence(features.Value("int64")),
+                            }
+                        ),
+                    )
+        super().__init__(
+            model=model,
+            args=args,
+            data_collator=data_collator,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
+            processing_class=processing_class,
+            model_init=model_init,
+            compute_metrics=compute_metrics,
+            callbacks=callbacks,
+            optimizers=optimizers,
+            preprocess_logits_for_metrics=preprocess_logits_for_metrics,
+        )
+        # Add tags for models that have been loaded with the correct transformers version
+        if hasattr(self.model, "add_model_tags"):
+            self.model.add_model_tags(self._tag_names)
+    @staticmethod
+    def tokenize_row(
+        features,
+        tokenizer,
+        step_separator,
+        max_length,
+        max_prompt_length,
+        max_completion_length,
+        train_on_last_step_only,
+        is_eval,
+    ):
+        r"""
+        Tokenize a row of the dataset.
+        Args:
+            features (`dict[str, str]`):
+                Row of the dataset, should contain the keys `"prompt"`, `"completions"`, and `"labels"`.
+            tokenizer ([`~transformers.PreTrainedTokenizerBase`]):
+                Tokenizer used to process the data.
+            step_separator (`str`):
+                Separator between steps in the completion.
+            max_length (`int` or `None`):
+               Maximum length of the sequences (prompt + completion). If `None`, the sequences are not truncated.
+            max_prompt_length (`int` or `None`):
+                Maximum length of the prompt. If `None`, the prompt is not truncated.
+            max_completion_length (`int` or `None`):
+                Maximum length of the completion sequences. If `None`, the completion sequences are not truncated.
+            train_on_last_step_only (`bool`):
+                Whether to train only on the last step. If `True`, the labels are `-100` for all tokens except the last
+                token of the completion.
+            is_eval (`bool`):
+                Whether the function is used to tokenize samples from a training or an evaluation dataset. Used only if
+                `train_on_last_step_only` is set to `True`.
+        Returns:
+            `dict[str, list[int]]`:
+                Tokenized sequences with the keys `"input_ids"`, and `"labels".
+        Example:
+        ```python
+        >>> from transformers import AutoTokenizer
+        >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
+        >>> features = {
+        ...     "prompt": "Which number is larger, 9.8 or 9.11?",
+        ...     "completions": ["11 is greater than 8.", "Hence, 9.11 > 9.8."],
+        ...     "labels": [True, False],
+        ... }
+        >>> PRMTrainer.tokenize_row(
+        ...     features, tokenizer, "\n", max_completion_length=None, train_on_last_step_only=False, is_eval=False
+        ... )
+        {'input_ids': [23085, 1372, 374, 8131, 11, 220, 24, 13, 23, 476, 220, 24, 13, 16, 16, 30, 16, 16, 374, 7046, 1091, 220, 23, 13, 198, 39, 763, 11, 220, 24, 13, 16, 16, 861, 220, 24, 13, 23, 13, 198],
+         'labels': [-100, -100, -100, -100, -100, -100, -100, -100, 1, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 0]}
+        ```
+        """
+        # Tokenize the prompt and completions
+        prompt_ids = tokenizer(features["prompt"], add_special_tokens=False)["input_ids"]
+        completions_ids = [
+            tokenizer(completion, add_special_tokens=False)["input_ids"] for completion in features["completions"]
+        ]
+        if train_on_last_step_only and not is_eval:
+            labels = [-100] * (len(features["labels"]) - 1) + [int(features["labels"][-1])]
+        else:
+            labels = [int(label) for label in features["labels"]]
+        # Get the ID of the separator token and add it to the completions
+        separator_ids = tokenizer.encode(step_separator, add_special_tokens=False)
+        completions_ids = [completion + separator_ids for completion in completions_ids]
+        # Create the label
+        labels = [[-100] * (len(completion) - 1) + [label] for completion, label in zip(completions_ids, labels)]
+        # Join the completions and labels steps
+        completion_ids = list(chain(*completions_ids))
+        labels = list(chain(*labels))
+        if tokenizer.bos_token_id is not None:
+            prompt_ids = [tokenizer.bos_token_id] + prompt_ids
+        # Truncate prompt and completion sequences
+        if max_prompt_length is not None:
+            prompt_ids = prompt_ids[-max_prompt_length:]
+        if max_completion_length is not None:
+            completion_ids = completion_ids[:max_completion_length]
+            labels = labels[:max_completion_length]
+        input_ids = prompt_ids + completion_ids
+        labels = [-100] * len(prompt_ids) + labels
+        if max_length is not None:
+            input_ids = input_ids[:max_length]
+            labels = labels[:max_length]
+        return {"input_ids": input_ids, "labels": labels}
+    # Ensure the model card is saved along with the checkpoint
+    def _save_checkpoint(self, model, trial):
+        if self.args.hub_model_id is None:
+            model_name = Path(self.args.output_dir).name
+        else:
+            model_name = self.args.hub_model_id.split("/")[-1]
+        self.create_model_card(model_name=model_name)
+        super()._save_checkpoint(model, trial)
+class UnslothPRMTrainer(_UnslothPRMTrainer):
+    """
+    Initialize PRMTrainer.
+    Args:
+        model ([`~transformers.PreTrainedModel`]):
+            The model to train, preferably an `AutoModelForTokenClassification`.
+        args ([`PRMConfig`]):
+            The arguments to use for training.
+        data_collator ([`~transformers.DataCollator`]):
+            The data collator to use for training. If None is specified, the default data collator
+            ([`~transformers.DataCollatorForTokenClassification`]) will be used which will pad the sequences to the
+            maximum length of the sequences in the batch, given a dataset of paired sequences.
+        train_dataset ([`~datasets.Dataset`]):
+            The dataset to use for training.
+        eval_dataset ([`~datasets.Dataset`]):
+            The dataset to use for evaluation.
+        processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
+            Processing class used to process the data. If provided, will be used to automatically process the inputs
+            for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
+            reuse the fine-tuned model.
+        model_init (`Callable[[], transformers.PreTrainedModel]`):
+            The model initializer to use for training. If None is specified, the default model initializer will be
+            used.
+        compute_metrics (`Callable[[transformers.EvalPrediction], dict]`, *optional* defaults to `compute_accuracy`):
+            The metrics to use for evaluation. If no metrics are specified, the default metric (`compute_accuracy`)
+            will be used.
+        callbacks (`list[transformers.TrainerCallback]`):
+            The callbacks to use for training.
+        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
+            The optimizer and scheduler to use for training.
+        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
+            The function to use to preprocess the logits before computing the metrics.
+        peft_config (`dict`, defaults to `None`):
+            The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in
+            a PEFT model.
+    """
+    def __init__(
+        self,
+        model = None,
+        args = None,
+        data_collator = None,
+        train_dataset = None,
+        eval_dataset = None,
+        processing_class = None,
+        model_init = None,
+        compute_metrics = None,
+        callbacks = None,
+        preprocess_logits_for_metrics = None,
+        peft_config = None,
+        **kwargs
+    ):
+        if args is None: args = UnslothPRMConfig()
+        use_bf16 = getattr(args, 'bf16', False)
+        if type(use_bf16) is not bool: use_bf16 = False
+        use_fp16 = getattr(args, 'fp16', False)
+        if type(use_fp16) is not bool: use_fp16 = False
+        force_float32 = False
+        full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1'
+        if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'):
+            print('Unsloth: Switching to float32 training since model cannot work with float16')
+            force_float32 = True
+        mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32')
+        dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None)
+        if dtype is None: dtype = model.get_input_embeddings().weight.dtype
+        from unsloth_zoo.utils import _get_dtype
+        dtype = _get_dtype(dtype)
+        float16 = dtype == torch.float16
+        if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`')
+        if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`')
+        if force_float32:
+            # Forced float32 training
+            args.fp16 = False
+            args.bf16 = False
+            os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'
+            if hasattr(args, 'mixed_precision'): args.mixed_precision = 'no'
+            # args.mixed_precision is a new argument which needs to be set now
+        elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32':
+            # Mixed precision training
+            args.fp16 = float16
+            args.bf16 = not float16
+            os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16'
+            if hasattr(args, 'mixed_precision'): args.mixed_precision = 'fp16' if float16 else 'bf16'
+            # args.mixed_precision is a new argument which needs to be set now
+        elif mixed_precision_dtype == 'bfloat16':
+            # Both False since bfloat16 full finetuning doesn't do any autocasting.
+            args.fp16 = False
+            args.bf16 = False
+            os.environ['ACCELERATE_MIXED_PRECISION'] = 'no'
+            if hasattr(args, 'mixed_precision'): args.mixed_precision = 'no'
+            # args.mixed_precision is a new argument which needs to be set now
+        if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no':
+            args.eval_strategy = 'steps'
+            if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1
+        ga_steps = getattr(args, 'gradient_accumulation_steps', None)
+        if ga_steps is not None and ga_steps > 1:
+            from transformers import __version__ as transformers_version
+            if Version(transformers_version) <= Version('4.45.2'):
+                print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n'
+                      '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`')
+        if getattr(args, 'eval_strategy', 'no') != 'no':
+            eval_bsz = getattr(args, 'per_device_eval_batch_size', 8)
+            if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size
+            if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps
+        fp16_full_eval = getattr(args, 'fp16_full_eval', False)
+        if type(fp16_full_eval) is not bool: fp16_full_eval = False
+        bf16_full_eval = getattr(args, 'bf16_full_eval', False)
+        if type(bf16_full_eval) is not bool: bf16_full_eval = False
+        if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True
+        if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False
+        if force_float32:
+            args.bf16_full_eval = False
+            args.fp16_full_eval = False
+        elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16':
+            args.bf16_full_eval = True
+            args.fp16_full_eval = False
+        elif not bf16_full_eval and not fp16_full_eval:
+            args.bf16_full_eval = args.bf16
+            args.fp16_full_eval = args.fp16
+        _output_logits = False
+        if locals().get('compute_metrics', None) is not None: _output_logits = True
+        if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True
+        if _output_logits:
+            os.environ['UNSLOTH_RETURN_LOGITS'] = '1'
+        if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'):
+            pass
+        else:
+            model_max_seq_length = getattr(model, 'max_seq_length', None)
+            args_max_seq_length  = getattr(args,  'max_seq_length', None)
+            if args_max_seq_length is None and model_max_seq_length is not None:
+                max_seq_length = model.max_seq_length
+                if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length
+        if model is not None and hasattr(model, 'for_training'):
+            model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))
+        if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right'
+        if 'processing_class' in locals():
+            if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right'
+            if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right'
+        __tokenizer = processing_class if 'processing_class' in locals() else tokenizer
+        from unsloth_zoo.vision_utils import UnslothVisionDataCollator
+        if not isinstance(data_collator, UnslothVisionDataCollator):
+            if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names:
+                data_collator = TransformersDataCollatorForLanguageModeling(
+                    __tokenizer,
+                    mlm = False,
+                    mlm_probability = 0.0,
+                    pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                )
+            elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names:
+                data_collator = DataCollatorForSeq2Seq(
+                    __tokenizer,
+                    pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                )
+        else:
+            if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False
+            if hasattr(args, 'dataset_text_field'): args.dataset_text_field = ''
+            if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True}
+        if not isinstance(data_collator, UnslothVisionDataCollator):
+            if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'):
+                if isinstance(data_collator, DataCollatorForSeq2Seq):
+                    data_collator = DataCollatorForSeq2Seq(
+                        __tokenizer.tokenizer,
+                        pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                    )
+                else:
+                    data_collator = TransformersDataCollatorForLanguageModeling(
+                        __tokenizer.tokenizer,
+                        mlm = False,
+                        mlm_probability = 0.0,
+                        pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None),
+                    )
+        other_metrics = []
+        from unsloth_zoo.logging_utils import PatchRLStatistics
+        PatchRLStatistics('prm_trainer', other_metrics)
+        # [TODO] Fix up DataParallel multiplying batch sizes
+        # [TODO] DDP works, but DP seems to not work? [TODO]
+        if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1:
+            if getattr(args, "_n_gpu", 1) != 1:
+                args._n_gpu = 1
+        if "model" in locals() and hasattr(model, "for_training"):
+            model.for_training(use_gradient_checkpointing=getattr(args, 'gradient_checkpointing', True))
+        super().__init__(
+            model = model,
+            args = args,
+            data_collator = data_collator,
+            train_dataset = train_dataset,
+            eval_dataset = eval_dataset,
+            processing_class = processing_class,
+            model_init = model_init,
+            compute_metrics = compute_metrics,
+            callbacks = callbacks,
+            preprocess_logits_for_metrics = preprocess_logits_for_metrics,
+            peft_config = peft_config,**kwargs)
+        if "model" in locals() and hasattr(model, "for_inference"):
+            model.for_inference()
+        if hasattr(self, 'neftune_hook_handle'):
+            self.neftune_hook_handle.remove()
+            if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle
+        if getattr(args, 'neftune_noise_alpha', None) is not None:
+            model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha
+        pass
+        if hasattr(self, 'accelerator'):
+            scaler = self.accelerator.scaler
+            current_model = model
+            while hasattr(current_model, 'model'):
+                current_model.accelerator_scaler = scaler
+                current_model = current_model.model
+            current_model.accelerator_scaler = scaler
+        pass
+        if hasattr(self, 'train'):
+            self.train = MethodType(prepare_for_training_mode(self.__class__.train), self)
+        pass
+pass