PyPI - nextrec - Versions diffs - 0.4.24__py3-none-any.whl → 0.4.27__py3-none-any.whl - Mend

nextrec 0.4.24py3-none-any.whl → 0.4.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

nextrec/__version__.py +1 -1
nextrec/basic/asserts.py +72 -0
nextrec/basic/loggers.py +18 -1
nextrec/basic/model.py +191 -71
nextrec/basic/summary.py +58 -0
nextrec/cli.py +13 -0
nextrec/data/data_processing.py +3 -9
nextrec/data/dataloader.py +25 -2
nextrec/data/preprocessor.py +283 -36
nextrec/models/multi_task/[pre]aitm.py +173 -0
nextrec/models/multi_task/[pre]snr_trans.py +232 -0
nextrec/models/multi_task/[pre]star.py +192 -0
nextrec/models/multi_task/apg.py +330 -0
nextrec/models/multi_task/cross_stitch.py +229 -0
nextrec/models/multi_task/escm.py +290 -0
nextrec/models/multi_task/esmm.py +8 -21
nextrec/models/multi_task/hmoe.py +203 -0
nextrec/models/multi_task/mmoe.py +20 -28
nextrec/models/multi_task/pepnet.py +68 -66
nextrec/models/multi_task/ple.py +30 -44
nextrec/models/multi_task/poso.py +13 -22
nextrec/models/multi_task/share_bottom.py +14 -25
nextrec/models/ranking/afm.py +2 -2
nextrec/models/ranking/autoint.py +2 -4
nextrec/models/ranking/dcn.py +2 -3
nextrec/models/ranking/dcn_v2.py +2 -3
nextrec/models/ranking/deepfm.py +2 -3
nextrec/models/ranking/dien.py +7 -9
nextrec/models/ranking/din.py +8 -10
nextrec/models/ranking/eulernet.py +1 -2
nextrec/models/ranking/ffm.py +1 -2
nextrec/models/ranking/fibinet.py +2 -3
nextrec/models/ranking/fm.py +1 -1
nextrec/models/ranking/lr.py +1 -1
nextrec/models/ranking/masknet.py +1 -2
nextrec/models/ranking/pnn.py +1 -2
nextrec/models/ranking/widedeep.py +2 -3
nextrec/models/ranking/xdeepfm.py +2 -4
nextrec/models/representation/rqvae.py +4 -4
nextrec/models/retrieval/dssm.py +18 -26
nextrec/models/retrieval/dssm_v2.py +15 -22
nextrec/models/retrieval/mind.py +9 -15
nextrec/models/retrieval/sdm.py +36 -33
nextrec/models/retrieval/youtube_dnn.py +16 -24
nextrec/models/sequential/hstu.py +2 -2
nextrec/utils/__init__.py +5 -1
nextrec/utils/config.py +2 -0
nextrec/utils/model.py +16 -77
nextrec/utils/torch_utils.py +11 -0
{nextrec-0.4.24.dist-info → nextrec-0.4.27.dist-info}/METADATA +72 -62
nextrec-0.4.27.dist-info/RECORD +90 -0
nextrec/models/multi_task/aitm.py +0 -0
nextrec/models/multi_task/snr_trans.py +0 -0
nextrec-0.4.24.dist-info/RECORD +0 -86
{nextrec-0.4.24.dist-info → nextrec-0.4.27.dist-info}/WHEEL +0 -0
{nextrec-0.4.24.dist-info → nextrec-0.4.27.dist-info}/entry_points.txt +0 -0
{nextrec-0.4.24.dist-info → nextrec-0.4.27.dist-info}/licenses/LICENSE +0 -0

nextrec/basic/model.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Base Model & Base Match Model Class
 Date: create on 27/10/2025
-Checkpoint: edit on 30/12/2025
+Checkpoint: edit on 01/01/2026
 Author: Yang Zhou,zyaztec@gmail.com
 """
@@ -36,6 +36,7 @@ from torch.utils.data import DataLoader
 from torch.utils.data.distributed import DistributedSampler
 from nextrec import __version__
+from nextrec.basic.asserts import assert_task
 from nextrec.basic.callback import (
     CallbackList,
     CheckpointSaver,
@@ -88,9 +89,8 @@ from nextrec.utils.config import safe_value
 from nextrec.utils.model import (
     compute_ranking_loss,
     get_loss_list,
-    resolve_loss_weights,
-    get_training_modes,
 )
 from nextrec.utils.types import (
     LossName,
     OptimizerName,
@@ -100,6 +100,8 @@ from nextrec.utils.types import (
     MetricsName,
 )
+from nextrec.utils.data import FILE_FORMAT_CONFIG
 class BaseModel(SummarySet, FeatureSet, nn.Module):
     @property
@@ -118,7 +120,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         target: list[str] | str | None = None,
         id_columns: list[str] | str | None = None,
         task: TaskTypeName | list[TaskTypeName] | None = None,
-        training_mode: TrainingModeName | list[TrainingModeName] = "pointwise",
+        training_mode: TrainingModeName | list[TrainingModeName] | None = None,
         embedding_l1_reg: float = 0.0,
         dense_l1_reg: float = 0.0,
         embedding_l2_reg: float = 0.0,
@@ -138,10 +140,10 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             dense_features: DenseFeature definitions.
             sparse_features: SparseFeature definitions.
             sequence_features: SequenceFeature definitions.
-            target: Target column name. e.g., 'label' or ['label1', 'label2'].
+            target: Target column name. e.g., 'label_ctr' or ['label_ctr', 'label_cvr'].
             id_columns: Identifier column name, only need to specify if GAUC is required. e.g., 'user_id'.
             task: Task types, e.g., 'binary', 'regression', or ['binary', 'regression']. If None, falls back to self.default_task.
-            training_mode: Training mode for ranking tasks; a single mode or a list per task.
+            training_mode: Training mode for different tasks. e.g., 'pointwise', ['pointwise', 'pairwise'].
             embedding_l1_reg: L1 regularization strength for embedding params. e.g., 1e-6.
             dense_l1_reg: L1 regularization strength for dense params. e.g., 1e-5.
@@ -193,10 +195,12 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         self.task = task or self.default_task
         self.nums_task = len(self.task) if isinstance(self.task, list) else 1
-        self.training_modes = get_training_modes(training_mode, self.nums_task)
-        self.training_mode = (
-            self.training_modes if self.nums_task > 1 else self.training_modes[0]
-        )
+        training_mode = training_mode or "pointwise"
+        if isinstance(training_mode, list):
+            self.training_modes = list(training_mode)
+        else:
+            self.training_modes = [training_mode] * self.nums_task
         self.embedding_l1_reg = embedding_l1_reg
         self.dense_l1_reg = dense_l1_reg
@@ -215,6 +219,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         self.train_data_summary = None
         self.valid_data_summary = None
+        self.note = None
     def register_regularization_weights(
         self,
@@ -222,6 +227,15 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         exclude_modules: list[str] | None = None,
         include_modules: list[str] | None = None,
     ):
+        """
+        Register parameters for regularization.
+        By default, all nn.Linear weights (excluding those in BatchNorm/Dropout layers) and embedding weights under `embedding_attr` are registered.
+        Args:
+            embedding_attr: Attribute name of the embedding layer/module.
+            exclude_modules: List of module name substrings to exclude from regularization.
+            include_modules: List of module name substrings to include for regularization. If provided, only modules containing these substrings are included.
+        """
         exclude_modules = exclude_modules or []
         include_modules = include_modules or []
         embedding_layer = getattr(self, embedding_attr, None)
@@ -268,6 +282,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                     existing_reg_ids.add(id(module.weight))
     def add_reg_loss(self) -> torch.Tensor:
+        """
+        Compute the regularization loss based on registered parameters and their respective regularization strengths.
+        """
         reg_loss = torch.tensor(0.0, device=self.device)
         if self.embedding_l1_reg > 0:
@@ -289,9 +306,25 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             )
         return reg_loss
+    # todo: support build pairwise/listwise label in input
     def get_input(self, input_data: dict, require_labels: bool = True):
+        """
+        Prepare unified input features and labels from the given input data.
+        Args:
+            input_data: Input data dictionary containing 'features' and optionally 'labels', e.g., {'features': {'feat1': [...], 'feat2': [...]}, 'labels': {'label': [...]}}.
+            require_labels: Whether labels are required in the input data. Default is True: for training and evaluation with labels.
+        Note:
+            target tensor shape will always be (batch_size, num_targets)
+        """
         feature_source = input_data.get("features", {})
+        # todo: pairwise/listwise label support
+        # "labels": {...} should contain pointwise/pair index/list index/ relevance scores
+        # now only have pointwise label support
         label_source = input_data.get("labels")
         X_input = {}
         for feature in self.all_features:
             if feature.name not in feature_source:
@@ -307,13 +340,14 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 device=self.device,
             )
         y = None
+        # if need labels: training or eval with labels
         if len(self.target_columns) > 0 and (
             require_labels
             or (
                 label_source
                 and any(name in label_source for name in self.target_columns)
             )
-        ):  # need labels: training or eval with labels
+        ):
             target_tensors = []
             for target_name in self.target_columns:
                 if label_source is None or target_name not in label_source:
@@ -358,6 +392,10 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         This function will split training data into training and validation sets when:
         1. valid_data is None;
         2. valid_split is provided.
+        Returns:
+            train_loader: DataLoader for training data.
+            valid_split_data: Validation data dict/dataframe split from training data.
         """
         if not (0 < valid_split < 1):
             raise ValueError(
@@ -375,7 +413,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                     )
         else:
             raise TypeError(
-                f"[BaseModel-validation Error] If you want to use valid_split, train_data must be a pandas DataFrame or a dict instead of {type(train_data)}"
+                f"[BaseModel-validation Error] If you want to use valid_split, train_data must be DataFrame or a dict, now got {type(train_data)}"
             )
         rng = np.random.default_rng(42)
         indices = rng.permutation(total_length)
@@ -426,7 +464,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         Args:
             optimizer: Optimizer name or instance. e.g., 'adam', 'sgd', or torch.optim.Adam().
             optimizer_params: Optimizer parameters. e.g., {'lr': 1e-3, 'weight_decay': 1e-5}.
-            scheduler: Learning rate scheduler name or instance. e.g., 'step_lr', 'cosine_annealing', or torch.optim.lr_scheduler.StepLR().
+            scheduler: Learning rate scheduler name or instance. e.g., 'step', 'cosine', or torch.optim.lr_scheduler.StepLR().
             scheduler_params: Scheduler parameters. e.g., {'step_size': 10, 'gamma': 0.1}.
             loss: Loss function name, instance, or list for multi-task. e.g., 'bce', 'mse', or torch.nn.BCELoss(), you can also use custom loss functions.
             loss_params: Loss function parameters, or list for multi-task. e.g., {'weight': tensor([0.25, 0.75])}.
@@ -435,36 +473,31 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             ignore_label: Label value to ignore when computing loss. Use this to skip gradients for unknown labels.
         """
         self.ignore_label = ignore_label
-        default_losses = {
-            "pointwise": "bce",
-            "pairwise": "bpr",
-            "listwise": "listnet",
-        }
-        loss_list = get_loss_list(
-            loss, self.training_modes, self.nums_task, default_losses
-        )
-        self.loss_params = loss_params or {}
-        optimizer_params = optimizer_params or {}
+        # get loss list
+        loss_list = get_loss_list(loss, self.training_modes, self.nums_task)
+        self.loss_params = {} if loss_params is None else loss_params
+        self.optimizer_params = optimizer_params or {}
+        self.scheduler_params = scheduler_params or {}
         self.optimizer_name = (
             optimizer if isinstance(optimizer, str) else optimizer.__class__.__name__
         )
-        self.optimizer_params = optimizer_params
         self.optimizer_fn = get_optimizer(
             optimizer=optimizer,
             params=self.parameters(),
-            **optimizer_params,
+            **self.optimizer_params,
         )
-        scheduler_params = scheduler_params or {}
         if scheduler is None:
             self.scheduler_name = None
         elif isinstance(scheduler, str):
             self.scheduler_name = scheduler
         else:
             self.scheduler_name = getattr(scheduler, "__name__", scheduler.__class__.__name__)  # type: ignore
-        self.scheduler_params = scheduler_params
         self.scheduler_fn = (
-            get_scheduler(scheduler, self.optimizer_fn, **scheduler_params)
+            get_scheduler(scheduler, self.optimizer_fn, **self.scheduler_params)
             if scheduler
             else None
         )
@@ -482,35 +515,56 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             for i in range(self.nums_task)
         ]
+        # loss weighting (grad norm or fixed weights)
         self.grad_norm = None
         self.grad_norm_shared_params = None
-        if isinstance(loss_weights, str) and loss_weights.lower() == "grad_norm":
+        is_grad_norm = (
+            loss_weights == "grad_norm"
+            or isinstance(loss_weights, dict)
+            and loss_weights.get("method") == "grad_norm"
+        )
+        if is_grad_norm:
             if self.nums_task == 1:
                 raise ValueError(
                     "[BaseModel-compile Error] GradNorm requires multi-task setup."
                 )
-            self.grad_norm = GradNormLossWeighting(
-                nums_task=self.nums_task, device=self.device
+            grad_norm_params = (
+                dict(loss_weights) if isinstance(loss_weights, dict) else {}
             )
-            self.loss_weights = None
-        elif (
-            isinstance(loss_weights, dict) and loss_weights.get("method") == "grad_norm"
-        ):
-            if self.nums_task == 1:
-                raise ValueError(
-                    "[BaseModel-compile Error] GradNorm requires multi-task setup."
-                )
-            grad_norm_params = dict(loss_weights)
             grad_norm_params.pop("method", None)
             self.grad_norm = GradNormLossWeighting(
                 nums_task=self.nums_task, device=self.device, **grad_norm_params
             )
             self.loss_weights = None
+        elif loss_weights is None:
+            self.loss_weights = None
+        elif self.nums_task == 1:
+            if isinstance(loss_weights, (list, tuple)):
+                if len(loss_weights) != 1:
+                    raise ValueError(
+                        "[BaseModel-compile Error] loss_weights list must have exactly one element for single-task setup."
+                    )
+                loss_weights = loss_weights[0]
+            self.loss_weights = [float(loss_weights)]
+        elif isinstance(loss_weights, (int, float)):
+            self.loss_weights = [float(loss_weights)] * self.nums_task
+        elif isinstance(loss_weights, (list, tuple)):
+            weights = [float(w) for w in loss_weights]
+            if len(weights) != self.nums_task:
+                raise ValueError(
+                    f"[BaseModel-compile Error] Number of loss_weights ({len(weights)}) must match number of tasks ({self.nums_task})."
+                )
+            self.loss_weights = weights
         else:
-            self.loss_weights = resolve_loss_weights(loss_weights, self.nums_task)
+            raise TypeError(
+                f"[BaseModel-compile Error] loss_weights must be int, float, list or tuple, got {type(loss_weights)}"
+            )
         self.compiled = True
     def compute_loss(self, y_pred, y_true):
+        """
+        Compute the loss between predictions and ground truth labels, with loss weighting and ignore_label handling
+        """
         if y_true is None:
             raise ValueError(
                 "[BaseModel-compute_loss Error] Ground truth labels (y_true) are required."
@@ -522,13 +576,11 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 y_pred = y_pred.view(-1, 1)
             if y_true.dim() == 1:
                 y_true = y_true.view(-1, 1)
-            if y_pred.shape != y_true.shape:
-                raise ValueError(
-                    f"[BaseModel-compute_loss Error] Shape mismatch: {y_pred.shape} vs {y_true.shape}"
-                )
             loss_fn = self.loss_fn[0]
+            # mask ignored labels
+            # we don't suggest using ignore_label for single task training
             if self.ignore_label is not None:
                 valid_mask = y_true != self.ignore_label
                 if valid_mask.dim() > 1:
@@ -559,9 +611,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 loss *= self.loss_weights[0]
             return loss
-        # multi-task
-        if y_pred.shape != y_true.shape:
-            raise ValueError(f"Shape mismatch: {y_pred.shape} vs {y_true.shape}")
+        # multi-task: slice predictions and labels per task
         slices = (
             self.prediction_layer.task_slices  # type: ignore
             if hasattr(self, "prediction_layer")
@@ -593,9 +643,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 )
             else:
                 task_loss = self.loss_fn[i](y_pred_i, y_true_i)
-            # task_loss = normalize_task_loss(
-            #     task_loss, valid_count, total_count
-            # )  # normalize by valid samples to avoid loss scale issues
+                # task_loss = normalize_task_loss(
+                #     task_loss, valid_count, total_count
+                # )  # normalize by valid samples to avoid loss scale issues
             task_losses.append(task_loss)
         if self.grad_norm is not None:
@@ -619,11 +669,23 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         batch_size: int = 32,
         shuffle: bool = True,
         num_workers: int = 0,
+        prefetch_factor: int | None = None,
         sampler=None,
         return_dataset: bool = False,
     ):
         """
         Prepare a DataLoader from input data. Only used when input data is not a DataLoader.
+        Args:
+            data: Input data (dict/df/DataLoader).
+            batch_size: Batch size.
+            shuffle: Whether to shuffle the data (ignored when a sampler is provided).
+            num_workers: Number of DataLoader workers.
+            prefetch_factor: Number of batches loaded in advance by each worker.
+            sampler: Optional sampler for DataLoader.
+            return_dataset: Whether to return the tensor dataset along with the DataLoader, used for valid data
+        Returns:
+            DataLoader (and tensor dataset if return_dataset is True).
         """
         if isinstance(data, DataLoader):
             return (data, None) if return_dataset else data
@@ -639,6 +701,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 "[BaseModel-prepare_data_loader Error] No data available to create DataLoader."
             )
         dataset = TensorDictDataset(tensors)
+        loader_kwargs = {}
+        if num_workers > 0 and prefetch_factor is not None:
+            loader_kwargs["prefetch_factor"] = prefetch_factor
         loader = DataLoader(
             dataset,
             batch_size=batch_size,
@@ -648,6 +713,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             num_workers=num_workers,
             pin_memory=self.device.type == "cuda",
             persistent_workers=num_workers > 0,
+            **loader_kwargs,
         )
         return (loader, dataset) if return_dataset else loader
@@ -676,6 +742,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         swanlab_kwargs: dict | None = None,
         auto_ddp_sampler: bool = True,
         log_interval: int = 1,
+        note: str | None = None,
         summary_sections: (
             list[Literal["feature", "model", "train", "data"]] | None
         ) = None,
@@ -707,6 +774,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             swanlab_kwargs: Optional kwargs for swanlab.init(...).
             auto_ddp_sampler: Attach DistributedSampler automatically when distributed, set False to when data is already sharded per rank.
             log_interval: Log validation metrics every N epochs (still computes metrics each epoch).
+            note: Optional note for the training run.
             summary_sections: Optional summary sections to print. Choose from
                 ["feature", "model", "train", "data"]. Defaults to all.
@@ -720,6 +788,8 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         )
         self.to(self.device)
+        assert_task(self.task, len(self.target_columns), model_name=self.model_name)
         if not self.compiled:
             self.compile(
                 optimizer="adam",
@@ -770,11 +840,13 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         self.metrics_sample_limit = (
             None if metrics_sample_limit is None else int(metrics_sample_limit)
         )
+        self.note = note
         training_config = {}
         if self.is_main_process:
             training_config = {
                 "model_name": getattr(self, "model_name", self.__class__.__name__),
+                "note": self.note,
                 "task": self.task,
                 "target_columns": self.target_columns,
                 "batch_size": batch_size,
@@ -822,6 +894,14 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                     else:
                         swanlab.login(api_key=swanlab_api)
+        if use_wandb and self.note:
+            wandb_kwargs = dict(wandb_kwargs or {})
+            wandb_kwargs.setdefault("notes", self.note)
+        if use_swanlab and self.note:
+            swanlab_kwargs = dict(swanlab_kwargs or {})
+            swanlab_kwargs.setdefault("description", self.note)
         self.training_logger = (
             TrainingLogger(
                 session=self.session,
@@ -1253,7 +1333,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         for batch_index, batch_data in batch_iter:
             batch_dict = batch_to_dict(batch_data)
             X_input, y_true = self.get_input(batch_dict, require_labels=True)
-            # call via __call__ so DDP hooks run (no grad sync if calling .forward directly)
+            # call via __call__ so DDP hooks run
             y_pred = model(X_input)  # type: ignore
             loss = self.compute_loss(y_pred, y_true)
@@ -1556,7 +1636,6 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         num_workers: int = 0,
     ) -> pd.DataFrame | np.ndarray | Path | None:
         """
-        Note: predict does not support distributed mode currently, consider it as a single-process operation.
         Make predictions on the given data.
         Args:
@@ -1569,6 +1648,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             return_dataframe: Whether to return predictions as a pandas DataFrame; if False, returns a NumPy array.
             stream_chunk_size: Number of rows per chunk when using streaming mode for large datasets.
             num_workers: DataLoader worker count.
+        Note:
+            predict does not support distributed mode currently, consider it as a single-process operation.
         """
         self.eval()
         # Use prediction-time id_columns if provided, otherwise fall back to model's id_columns
@@ -1753,6 +1835,21 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         return_dataframe: bool,
         id_columns: list[str] | None = None,
     ):
+        """
+        Make predictions on the given data using streaming mode for large datasets.
+        Args:
+            data: Input data for prediction (file path, dict, DataFrame, or DataLoader).
+            batch_size: Batch size for prediction.
+            save_path: Path to save predictions.
+            save_format: Format to save predictions ('csv' or 'parquet').
+            include_ids: Whether to include ID columns in the output.
+            stream_chunk_size: Number of rows per chunk when using streaming mode.
+            return_dataframe: Whether to return predictions as a pandas DataFrame.
+            id_columns: Column name(s) to use as IDs; if None, uses model's id_columns.
+        Note:
+            This method uses streaming writes to handle large datasets without loading all data into memory.
+        """
         if isinstance(data, (str, os.PathLike)):
             rec_loader = RecDataLoader(
                 dense_features=self.dense_features,
@@ -1795,8 +1892,6 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 "Results will be collected in memory and saved at the end. Use csv or parquet for true streaming."
             )
-        from nextrec.utils.data import FILE_FORMAT_CONFIG
         suffix = FILE_FORMAT_CONFIG[save_format]["extension"][0]
         target_path = get_save_path(
@@ -1908,6 +2003,14 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         add_timestamp: bool | None = None,
         verbose: bool = True,
     ):
+        """
+        Save the model state and features configuration to disk.
+        Args:
+            save_path: Path to save the model; if None, saves to the session's model directory.
+            add_timestamp: Whether to add a timestamp to the filename; if None, defaults to True.
+            verbose: Whether to log the save location.
+        """
         add_timestamp = False if add_timestamp is None else add_timestamp
         target_path = get_save_path(
             path=save_path,
@@ -1950,6 +2053,14 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         map_location: str | torch.device | None = "cpu",
         verbose: bool = True,
     ):
+        """
+        Load the model state and features configuration from disk.
+        Args:
+            save_path: Path to load the model from; can be a directory or a specific .pt file.
+            map_location: Device mapping for loading the model (e.g., 'cpu', 'cuda:0').
+            verbose: Whether to log the load location.
+        """
         self.to(self.device)
         base_path = Path(save_path)
         if base_path.is_dir():
@@ -2016,6 +2127,13 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         """
         Load a model from a checkpoint path. The checkpoint path should contain:
         a .pt file and a features_config.pkl file.
+        Args:
+            checkpoint_path: Path to the checkpoint directory or specific .pt file.
+            map_location: Device mapping for loading the model (e.g., 'cpu', 'cuda:0').
+            device: Device to place the model on after loading.
+            session_id: Optional session ID for the model.
+            **kwargs: Additional keyword arguments to pass to the model constructor.
         """
         base_path = Path(checkpoint_path)
         verbose = kwargs.pop("verbose", True)
@@ -2135,6 +2253,7 @@ class BaseMatchModel(BaseModel):
             target=target,
             id_columns=id_columns,
             task=task,
+            training_mode=training_mode,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
@@ -2157,10 +2276,14 @@ class BaseMatchModel(BaseModel):
         self.item_sparse_features = item_sparse_features
         self.item_sequence_features = item_sequence_features
-        self.training_mode = training_mode
         self.num_negative_samples = num_negative_samples
         self.temperature = temperature
         self.similarity_metric = similarity_metric
+        primary_mode = self.training_modes[0] if self.training_modes else "pointwise"
+        if primary_mode not in self.support_training_modes:
+            raise ValueError(
+                f"{self.model_name.upper()} does not support training_mode='{primary_mode}'. Supported modes: {self.support_training_modes}"
+            )
         self.user_features_all = (
             self.user_dense_features
             + self.user_sparse_features
@@ -2176,7 +2299,7 @@ class BaseMatchModel(BaseModel):
         self.head = RetrievalHead(
             similarity_metric=self.similarity_metric,
             temperature=self.temperature,
-            training_mode=self.training_mode,
+            training_mode=primary_mode,
             apply_sigmoid=True,
         )
@@ -2209,11 +2332,6 @@ class BaseMatchModel(BaseModel):
             loss_params: Parameters for the loss function(s). e.g., {'reduction': 'mean'}.
             loss_weights: Weights for the loss function(s). e.g., 1.0 or [0.7, 0.3].
         """
-        if self.training_mode not in self.support_training_modes:
-            raise ValueError(
-                f"{self.model_name.upper()} does not support training_mode='{self.training_mode}'. Supported modes: {self.support_training_modes}"
-            )
         default_loss_by_mode = {
             "pointwise": "bce",
             "pairwise": "bpr",
@@ -2221,26 +2339,27 @@ class BaseMatchModel(BaseModel):
         }
         effective_loss = loss
+        primary_mode = self.training_modes[0] if self.training_modes else "pointwise"
         if effective_loss is None:
-            effective_loss = default_loss_by_mode[self.training_mode]
+            effective_loss = default_loss_by_mode[primary_mode]
         elif isinstance(effective_loss, str):
-            if self.training_mode in {"pairwise", "listwise"} and effective_loss in {
+            if primary_mode in {"pairwise", "listwise"} and effective_loss in {
                 "bce",
                 "binary_crossentropy",
             }:
-                effective_loss = default_loss_by_mode[self.training_mode]
+                effective_loss = default_loss_by_mode[primary_mode]
         elif isinstance(effective_loss, list):
             if not effective_loss:
-                effective_loss = [default_loss_by_mode[self.training_mode]]
+                effective_loss = [default_loss_by_mode[primary_mode]]
             else:
                 first = effective_loss[0]
                 if (
-                    self.training_mode in {"pairwise", "listwise"}
+                    primary_mode in {"pairwise", "listwise"}
                     and isinstance(first, str)
                     and first in {"bce", "binary_crossentropy"}
                 ):
                     effective_loss = [
-                        default_loss_by_mode[self.training_mode],
+                        default_loss_by_mode[primary_mode],
                         *effective_loss[1:],
                     ]
         return super().compile(
@@ -2318,11 +2437,12 @@ class BaseMatchModel(BaseModel):
         return self.head(user_emb, item_emb, similarity_fn=self.compute_similarity)
     def compute_loss(self, y_pred, y_true):
-        if self.training_mode == "pointwise":
+        primary_mode = self.training_modes[0] if self.training_modes else "pointwise"
+        if primary_mode == "pointwise":
             return super().compute_loss(y_pred, y_true)
         # pairwise / listwise using inbatch neg
-        elif self.training_mode in ["pairwise", "listwise"]:
+        elif primary_mode in ["pairwise", "listwise"]:
             if not isinstance(y_pred, (tuple, list)) or len(y_pred) != 2:
                 raise ValueError(
                     "For pairwise/listwise training, forward should return (user_emb, item_emb). Please check BaseMatchModel.forward implementation."
@@ -2365,7 +2485,7 @@ class BaseMatchModel(BaseModel):
                 loss *= float(self.loss_weights[0])
             return loss
         else:
-            raise ValueError(f"Unknown training mode: {self.training_mode}")
+            raise ValueError(f"Unknown training mode: {primary_mode}")
     def prepare_feature_data(
         self,

nextrec 0.4.24__py3-none-any.whl → 0.4.27__py3-none-any.whl

nextrec 0.4.24py3-none-any.whl → 0.4.27py3-none-any.whl