PyPI - replay-rec - Versions diffs - 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl - Mend

replay-rec 0.16.0py3-none-any.whl → 0.17.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

replay/__init__.py +1 -1
replay/data/__init__.py +1 -1
replay/data/dataset.py +45 -42
replay/data/dataset_utils/dataset_label_encoder.py +6 -7
replay/data/nn/__init__.py +1 -1
replay/data/nn/schema.py +20 -33
replay/data/nn/sequence_tokenizer.py +217 -87
replay/data/nn/sequential_dataset.py +6 -22
replay/data/nn/torch_sequential_dataset.py +20 -11
replay/data/nn/utils.py +7 -9
replay/data/schema.py +17 -17
replay/data/spark_schema.py +0 -1
replay/metrics/base_metric.py +38 -79
replay/metrics/categorical_diversity.py +24 -58
replay/metrics/coverage.py +25 -49
replay/metrics/descriptors.py +4 -13
replay/metrics/experiment.py +3 -8
replay/metrics/hitrate.py +3 -6
replay/metrics/map.py +3 -6
replay/metrics/mrr.py +1 -4
replay/metrics/ndcg.py +4 -7
replay/metrics/novelty.py +10 -29
replay/metrics/offline_metrics.py +26 -61
replay/metrics/precision.py +3 -6
replay/metrics/recall.py +3 -6
replay/metrics/rocauc.py +7 -10
replay/metrics/surprisal.py +13 -30
replay/metrics/torch_metrics_builder.py +0 -4
replay/metrics/unexpectedness.py +15 -20
replay/models/__init__.py +1 -2
replay/models/als.py +7 -15
replay/models/association_rules.py +12 -28
replay/models/base_neighbour_rec.py +21 -36
replay/models/base_rec.py +92 -215
replay/models/cat_pop_rec.py +9 -22
replay/models/cluster.py +17 -28
replay/models/extensions/ann/ann_mixin.py +7 -12
replay/models/extensions/ann/entities/base_hnsw_param.py +1 -1
replay/models/extensions/ann/entities/hnswlib_param.py +0 -6
replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -6
replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +4 -10
replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +7 -11
replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +5 -12
replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +11 -18
replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +1 -4
replay/models/extensions/ann/index_inferers/base_inferer.py +3 -10
replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +7 -17
replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +6 -14
replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +14 -28
replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +15 -25
replay/models/extensions/ann/index_inferers/utils.py +2 -9
replay/models/extensions/ann/index_stores/hdfs_index_store.py +4 -9
replay/models/extensions/ann/index_stores/shared_disk_index_store.py +2 -6
replay/models/extensions/ann/index_stores/spark_files_index_store.py +8 -14
replay/models/extensions/ann/index_stores/utils.py +5 -2
replay/models/extensions/ann/utils.py +3 -5
replay/models/kl_ucb.py +16 -22
replay/models/knn.py +37 -59
replay/models/nn/optimizer_utils/__init__.py +1 -6
replay/models/nn/optimizer_utils/optimizer_factory.py +3 -6
replay/models/nn/sequential/bert4rec/__init__.py +1 -1
replay/models/nn/sequential/bert4rec/dataset.py +6 -7
replay/models/nn/sequential/bert4rec/lightning.py +53 -56
replay/models/nn/sequential/bert4rec/model.py +12 -25
replay/models/nn/sequential/callbacks/__init__.py +1 -1
replay/models/nn/sequential/callbacks/prediction_callbacks.py +23 -25
replay/models/nn/sequential/callbacks/validation_callback.py +27 -30
replay/models/nn/sequential/postprocessors/postprocessors.py +1 -1
replay/models/nn/sequential/sasrec/dataset.py +8 -7
replay/models/nn/sequential/sasrec/lightning.py +53 -48
replay/models/nn/sequential/sasrec/model.py +4 -17
replay/models/pop_rec.py +9 -10
replay/models/query_pop_rec.py +7 -15
replay/models/random_rec.py +10 -18
replay/models/slim.py +8 -13
replay/models/thompson_sampling.py +13 -14
replay/models/ucb.py +11 -22
replay/models/wilson.py +5 -14
replay/models/word2vec.py +24 -69
replay/optimization/optuna_objective.py +13 -27
replay/preprocessing/__init__.py +1 -2
replay/preprocessing/converter.py +2 -7
replay/preprocessing/filters.py +67 -142
replay/preprocessing/history_based_fp.py +44 -116
replay/preprocessing/label_encoder.py +106 -68
replay/preprocessing/sessionizer.py +1 -11
replay/scenarios/fallback.py +3 -8
replay/splitters/base_splitter.py +43 -15
replay/splitters/cold_user_random_splitter.py +18 -31
replay/splitters/k_folds.py +14 -24
replay/splitters/last_n_splitter.py +33 -43
replay/splitters/new_users_splitter.py +31 -55
replay/splitters/random_splitter.py +16 -23
replay/splitters/ratio_splitter.py +30 -54
replay/splitters/time_splitter.py +13 -18
replay/splitters/two_stage_splitter.py +44 -79
replay/utils/__init__.py +1 -1
replay/utils/common.py +65 -0
replay/utils/dataframe_bucketizer.py +25 -31
replay/utils/distributions.py +3 -15
replay/utils/model_handler.py +36 -33
replay/utils/session_handler.py +11 -15
replay/utils/spark_utils.py +51 -85
replay/utils/time.py +8 -22
replay/utils/types.py +1 -3
{replay_rec-0.16.0.dist-info → replay_rec-0.17.0.dist-info}/METADATA +2 -2
replay_rec-0.17.0.dist-info/RECORD +127 -0
replay_rec-0.16.0.dist-info/RECORD +0 -126
{replay_rec-0.16.0.dist-info → replay_rec-0.17.0.dist-info}/LICENSE +0 -0
{replay_rec-0.16.0.dist-info → replay_rec-0.17.0.dist-info}/WHEEL +0 -0

replay/models/nn/sequential/bert4rec/__init__.py CHANGED Viewed

@@ -6,9 +6,9 @@ if TORCH_AVAILABLE:
         Bert4RecPredictionDataset,
         Bert4RecTrainingBatch,
         Bert4RecTrainingDataset,
+        Bert4RecUniformMasker,
         Bert4RecValidationBatch,
         Bert4RecValidationDataset,
-        Bert4RecUniformMasker,
     )
     from .lightning import Bert4Rec
     from .model import Bert4RecModel

replay/models/nn/sequential/bert4rec/dataset.py CHANGED Viewed

@@ -27,7 +27,6 @@ class Bert4RecTrainingBatch(NamedTuple):
     labels: torch.LongTensor
-# pylint: disable=too-few-public-methods
 class Bert4RecMasker(abc.ABC):
     """
     Interface for a token masking strategy during BERT model training
@@ -44,7 +43,6 @@ class Bert4RecMasker(abc.ABC):
         """
-# pylint: disable=too-few-public-methods
 class Bert4RecUniformMasker(Bert4RecMasker):
     """
     Token masking strategy that mask random token with uniform distribution.
@@ -90,7 +88,6 @@ class Bert4RecTrainingDataset(TorchDataset):
     Dataset that generates samples to train BERT-like model
     """
-    # pylint: disable=too-many-arguments
     def __init__(
         self,
         sequential: SequentialDataset,
@@ -121,13 +118,16 @@ class Bert4RecTrainingDataset(TorchDataset):
         super().__init__()
         if label_feature_name:
             if label_feature_name not in sequential.schema:
-                raise ValueError("Label feature name not found in provided schema")
+                msg = "Label feature name not found in provided schema"
+                raise ValueError(msg)
             if not sequential.schema[label_feature_name].is_cat:
-                raise ValueError("Label feature must be categorical")
+                msg = "Label feature must be categorical"
+                raise ValueError(msg)
             if not sequential.schema[label_feature_name].is_seq:
-                raise ValueError("Label feature must be sequential")
+                msg = "Label feature must be sequential"
+                raise ValueError(msg)
         self._max_sequence_length = max_sequence_length
         self._label_feature_name = label_feature_name or sequential.schema.item_id_feature_name
@@ -230,7 +230,6 @@ class Bert4RecValidationDataset(TorchDataset):
     Dataset that generates samples to infer and validate BERT-like model
     """
-    # pylint: disable=too-many-arguments
     def __init__(
         self,
         sequential: SequentialDataset,

replay/models/nn/sequential/bert4rec/lightning.py CHANGED Viewed

@@ -1,27 +1,21 @@
 import math
-from typing import Any, Optional, Tuple, Union, cast, Dict
+from typing import Any, Dict, Optional, Tuple, Union, cast
-import lightning as L
+import lightning
 import torch
 from replay.data.nn import TensorMap, TensorSchema
 from replay.models.nn.optimizer_utils import FatOptimizerFactory, LRSchedulerFactory, OptimizerFactory
-from .dataset import (
-    Bert4RecPredictionBatch,
-    Bert4RecTrainingBatch,
-    Bert4RecValidationBatch,
-    _shift_features
-)
+from .dataset import Bert4RecPredictionBatch, Bert4RecTrainingBatch, Bert4RecValidationBatch, _shift_features
 from .model import Bert4RecModel, CatFeatureEmbedding
-# pylint: disable=too-many-instance-attributes
-class Bert4Rec(L.LightningModule):
+class Bert4Rec(lightning.LightningModule):
     """
     Implements BERT training-validation loop
     """
-    # pylint: disable=too-many-arguments, too-many-locals
     def __init__(
         self,
         tensor_schema: TensorSchema,
@@ -102,8 +96,7 @@ class Bert4Rec(L.LightningModule):
         assert item_count
         self._vocab_size = item_count
-    # pylint: disable=unused-argument, arguments-differ
-    def training_step(self, batch: Bert4RecTrainingBatch, batch_idx: int) -> torch.Tensor:
+    def training_step(self, batch: Bert4RecTrainingBatch, batch_idx: int) -> torch.Tensor:  # noqa: ARG002
         """
         :param batch: Batch of training data.
         :param batch_idx: Batch index.
@@ -129,8 +122,9 @@ class Bert4Rec(L.LightningModule):
         """
         return self._model_predict(feature_tensors, padding_mask, tokens_mask)
-    # pylint: disable=unused-argument
-    def predict_step(self, batch: Bert4RecPredictionBatch, batch_idx: int, dataloader_idx: int = 0) -> torch.Tensor:
+    def predict_step(
+        self, batch: Bert4RecPredictionBatch, batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
+    ) -> torch.Tensor:
         """
         :param batch (Bert4RecPredictionBatch): Batch of prediction data.
         :param batch_idx (int): Batch index.
@@ -141,8 +135,9 @@ class Bert4Rec(L.LightningModule):
         batch = self._prepare_prediction_batch(batch)
         return self._model_predict(batch.features, batch.padding_mask, batch.tokens_mask)
-    # pylint: disable=unused-argument
-    def validation_step(self, batch: Bert4RecValidationBatch, batch_idx: int, dataloader_idx: int = 0) -> torch.Tensor:
+    def validation_step(
+        self, batch: Bert4RecValidationBatch, batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
+    ) -> torch.Tensor:
         """
         :param batch: Batch of prediction data.
         :param batch_idx: Batch index.
@@ -166,31 +161,28 @@ class Bert4Rec(L.LightningModule):
     def _prepare_prediction_batch(self, batch: Bert4RecPredictionBatch) -> Bert4RecPredictionBatch:
         if batch.padding_mask.shape[1] > self._model.max_len:
-            raise ValueError(
-                f"The length of the submitted sequence \
+            msg = f"The length of the submitted sequence \
                 must not exceed the maximum length of the sequence. \
                 The length of the sequence is given {batch.padding_mask.shape[1]}, \
-                while the maximum length is {self._model.max_len}")
+                while the maximum length is {self._model.max_len}"
+            raise ValueError(msg)
         if batch.padding_mask.shape[1] < self._model.max_len:
             query_id, padding_mask, features, _ = batch
             sequence_item_count = padding_mask.shape[1]
             for feature_name, feature_tensor in features.items():
                 if self._schema[feature_name].is_cat:
                     features[feature_name] = torch.nn.functional.pad(
-                        feature_tensor,
-                        (self._model.max_len - sequence_item_count, 0),
-                        value=0
+                        feature_tensor, (self._model.max_len - sequence_item_count, 0), value=0
                     )
                 else:
                     features[feature_name] = torch.nn.functional.pad(
                         feature_tensor.view(feature_tensor.size(0), feature_tensor.size(1)),
                         (self._model.max_len - sequence_item_count, 0),
-                        value=0
+                        value=0,
                     ).unsqueeze(-1)
             padding_mask = torch.nn.functional.pad(
-                padding_mask,
-                (self._model.max_len - sequence_item_count, 0),
-                value=0
+                padding_mask, (self._model.max_len - sequence_item_count, 0), value=0
             )
             shifted_features, shifted_padding_mask, tokens_mask = _shift_features(self._schema, features, padding_mask)
             batch = Bert4RecPredictionBatch(query_id, shifted_padding_mask, shifted_features, tokens_mask)
@@ -213,17 +205,12 @@ class Bert4Rec(L.LightningModule):
     def _compute_loss(self, batch: Bert4RecTrainingBatch) -> torch.Tensor:
         if self._loss_type == "BCE":
-            if self._loss_sample_count is None:
-                loss_func = self._compute_loss_bce
-            else:
-                loss_func = self._compute_loss_bce_sampled
+            loss_func = self._compute_loss_bce if self._loss_sample_count is None else self._compute_loss_bce_sampled
         elif self._loss_type == "CE":
-            if self._loss_sample_count is None:
-                loss_func = self._compute_loss_ce
-            else:
-                loss_func = self._compute_loss_ce_sampled
+            loss_func = self._compute_loss_ce if self._loss_sample_count is None else self._compute_loss_ce_sampled
         else:
-            raise ValueError(f"Not supported loss type: {self._loss_type}")
+            msg = f"Not supported loss type: {self._loss_type}"
+            raise ValueError(msg)
         loss = loss_func(
             batch.features,
@@ -246,8 +233,10 @@ class Bert4Rec(L.LightningModule):
         labels_mask = (~padding_mask) + tokens_mask
         masked_tokens = ~labels_mask
-        # Take only logits which correspond to non-padded tokens
-        # M = non_zero_count(target_padding_mask)
+        """
+        Take only logits which correspond to non-padded tokens
+        M = non_zero_count(target_padding_mask)
+        """
         logits = logits[masked_tokens]  # [M x V]
         labels = positive_labels[masked_tokens]  # [M]
@@ -374,7 +363,8 @@ class Bert4Rec(L.LightningModule):
             else:
                 multinomial_sample_distribution = torch.softmax(positive_logits, dim=-1)
         else:
-            raise NotImplementedError(f"Unknown negative sampling strategy: {self._negative_sampling_strategy}")
+            msg = f"Unknown negative sampling strategy: {self._negative_sampling_strategy}"
+            raise NotImplementedError(msg)
         n_negative_samples = min(n_negative_samples, vocab_size)
         if self._negatives_sharing:
@@ -426,7 +416,8 @@ class Bert4Rec(L.LightningModule):
         if self._loss_type == "CE":
             return torch.nn.CrossEntropyLoss()
-        raise NotImplementedError("Not supported loss_type")
+        msg = "Not supported loss_type"
+        raise NotImplementedError(msg)
     def get_all_embeddings(self) -> Dict[str, torch.nn.Embedding]:
         """
@@ -436,21 +427,22 @@ class Bert4Rec(L.LightningModule):
     def set_item_embeddings_by_size(self, new_vocab_size: int):
         """
-        Set item embeddings initialized with xavier_normal_ by new size of vocabulary
-        to item embedder.
+        Keep the current item embeddings and expand vocabulary with new embeddings
+        initialized with xavier_normal_ for new items.
-        :param new_vocab_size: Size of vocabulary with new items.
+        :param new_vocab_size: Size of vocabulary with new items included.
             Must be greater then already fitted.
         """
         if new_vocab_size <= self._vocab_size:
-            raise ValueError("New vocabulary size must be greater then already fitted")
+            msg = "New vocabulary size must be greater then already fitted"
+            raise ValueError(msg)
         item_tensor_feature_info = self._model.schema.item_id_features.item()
         item_tensor_feature_info._set_cardinality(new_vocab_size)
         weights_new = CatFeatureEmbedding(item_tensor_feature_info)
         torch.nn.init.xavier_normal_(weights_new.weight)
-        weights_new.weight.data[:self._vocab_size, :] = self._model.item_embedder.item_embeddings.data
+        weights_new.weight.data[: self._vocab_size, :] = self._model.item_embedder.item_embeddings.data
         self._set_new_item_embedder_to_model(weights_new, new_vocab_size)
@@ -464,15 +456,18 @@ class Bert4Rec(L.LightningModule):
             shape (n, h), where n - number of all items, h - model hidden size.
         """
         if all_item_embeddings.dim() != 2:
-            raise ValueError("Input tensor must have (number of all items, model hidden size) shape")
+            msg = "Input tensor must have (number of all items, model hidden size) shape"
+            raise ValueError(msg)
         new_vocab_size = all_item_embeddings.shape[0]
         if new_vocab_size < self._vocab_size:
-            raise ValueError("New vocabulary size can't be less then already fitted")
+            msg = "New vocabulary size can't be less then already fitted"
+            raise ValueError(msg)
         item_tensor_feature_info = self._model.schema.item_id_features.item()
         if all_item_embeddings.shape[1] != item_tensor_feature_info.embedding_dim:
-            raise ValueError("Input tensor second dimension doesn't match embedding dim")
+            msg = "Input tensor second dimension doesn't match embedding dim"
+            raise ValueError(msg)
         item_tensor_feature_info._set_cardinality(new_vocab_size)
@@ -490,37 +485,39 @@ class Bert4Rec(L.LightningModule):
             n - number of only new items, h - model hidden size.
         """
         if item_embeddings.dim() != 2:
-            raise ValueError("Input tensor must have (number of all items, model hidden size) shape")
+            msg = "Input tensor must have (number of all items, model hidden size) shape"
+            raise ValueError(msg)
         new_vocab_size = item_embeddings.shape[0] + self._vocab_size
         item_tensor_feature_info = self._model.schema.item_id_features.item()
         if item_embeddings.shape[1] != item_tensor_feature_info.embedding_dim:
-            raise ValueError("Input tensor second dimension doesn't match embedding dim")
+            msg = "Input tensor second dimension doesn't match embedding dim"
+            raise ValueError(msg)
         item_tensor_feature_info._set_cardinality(new_vocab_size)
         weights_new = CatFeatureEmbedding(item_tensor_feature_info)
         torch.nn.init.xavier_normal_(weights_new.weight)
-        weights_new.weight.data[:self._vocab_size, :] = self._model.item_embedder.item_embeddings.data
-        weights_new.weight.data[self._vocab_size:, :] = item_embeddings.data
+        weights_new.weight.data[: self._vocab_size, :] = self._model.item_embedder.item_embeddings.data
+        weights_new.weight.data[self._vocab_size :, :] = item_embeddings.data
         self._set_new_item_embedder_to_model(weights_new, new_vocab_size)
     def _set_new_item_embedder_to_model(self, weights_new: torch.nn.Embedding, new_vocab_size: int):
         self._model.item_embedder.cat_embeddings[self._model.schema.item_id_feature_name] = weights_new
         if self._model.enable_embedding_tying is True:
             self._model._head._item_embedder = self._model.item_embedder
             new_bias = torch.Tensor(new_vocab_size)
             new_bias.normal_(0, 0.01)
-            new_bias[:self._vocab_size] = self._model._head.out_bias.data
+            new_bias[: self._vocab_size] = self._model._head.out_bias.data
             self._model._head.out_bias = torch.nn.Parameter(new_bias)
         else:
             new_linear = torch.nn.Linear(self._model.hidden_size, new_vocab_size)
-            new_linear.weight.data[:self._vocab_size, :] = self._model._head.linear.weight.data
-            new_linear.bias.data[:self._vocab_size] = self._model._head.linear.bias.data
+            new_linear.weight.data[: self._vocab_size, :] = self._model._head.linear.weight.data
+            new_linear.bias.data[: self._vocab_size] = self._model._head.linear.bias.data
             self._model._head.linear = new_linear
         self._vocab_size = new_vocab_size
         self._model.item_count = new_vocab_size
+        self._schema.item_id_features[self._schema.item_id_feature_name]._set_cardinality(new_vocab_size)

replay/models/nn/sequential/bert4rec/model.py CHANGED Viewed

@@ -1,18 +1,18 @@
+import contextlib
 import math
 from abc import ABC, abstractmethod
-from typing import Optional, Tuple, Union, cast, Dict
+from typing import Dict, Optional, Tuple, Union, cast
 import torch
 from replay.data.nn import TensorFeatureInfo, TensorMap, TensorSchema
-# pylint: disable=too-many-instance-attributes
 class Bert4RecModel(torch.nn.Module):
     """
     BERT model
     """
-    # pylint: disable=too-many-arguments
     def __init__(
         self,
         schema: TensorSchema,
@@ -137,12 +137,7 @@ class Bert4RecModel(torch.nn.Module):
         """
         return self._head(out_embeddings, item_ids)
-    def get_query_embeddings(
-        self,
-        inputs: TensorMap,
-        pad_mask: torch.BoolTensor,
-        token_mask: torch.BoolTensor
-    ):
+    def get_query_embeddings(self, inputs: TensorMap, pad_mask: torch.BoolTensor, token_mask: torch.BoolTensor):
         """
         :param inputs: Batch of features.
         :param pad_mask: Padding mask where 0 - <PAD>, 1 otherwise.
@@ -159,13 +154,10 @@ class Bert4RecModel(torch.nn.Module):
     def _init(self) -> None:
         for _, param in self.named_parameters():
-            try:
+            with contextlib.suppress(ValueError):
                 torch.nn.init.xavier_normal_(param.data)
-            except ValueError:
-                pass
-# pylint: disable=too-many-instance-attributes
 class BertEmbedding(torch.nn.Module):
     """
     BERT Embedding which is consisted with under features
@@ -174,7 +166,6 @@ class BertEmbedding(torch.nn.Module):
         sum of all these features are output of BertEmbedding
     """
-    # pylint: disable=too-many-arguments
     def __init__(
         self,
         schema: TensorSchema,
@@ -206,19 +197,18 @@ class BertEmbedding(torch.nn.Module):
         for feature_name, tensor_info in schema.items():
             if not tensor_info.is_seq:
-                raise NotImplementedError("Non-sequential features is not yet supported")
+                msg = "Non-sequential features is not yet supported"
+                raise NotImplementedError(msg)
-            if tensor_info.is_cat:
-                dim = tensor_info.embedding_dim
-            else:
-                dim = tensor_info.tensor_dim
+            dim = tensor_info.embedding_dim if tensor_info.is_cat else tensor_info.tensor_dim
             if aggregation_method == "sum":
                 if common_dim is None:
                     common_dim = dim
                 if dim != common_dim:
-                    raise ValueError("Dimension of all features must be the same for sum aggregation")
+                    msg = "Dimension of all features must be the same for sum aggregation"
+                    raise ValueError(msg)
             else:
                 raise NotImplementedError()
@@ -242,7 +232,7 @@ class BertEmbedding(torch.nn.Module):
         :returns: Embeddings for input features.
         """
         if self.aggregation_method == "sum":
-            aggregated_embedding: torch.Tensor = None  # type: ignore
+            aggregated_embedding: torch.Tensor = None
             for feature_name in self.schema.categorical_features:
                 x = inputs[feature_name]
@@ -307,7 +297,7 @@ class BertEmbedding(torch.nn.Module):
         embeddings = {
             "item_embedding": self.item_embeddings.data.detach().clone(),
         }
-        for feature_name, _ in self.schema.items():
+        for feature_name in self.schema:
             if feature_name != self.schema.item_id_feature_name:
                 embeddings[feature_name] = self.cat_embeddings[feature_name].weight.data.detach().clone()
         if self.enable_positional_embedding:
@@ -335,7 +325,6 @@ class PositionalEmbedding(torch.nn.Module):
     Positional embedding.
     """
-    # pylint: disable=invalid-name
     def __init__(self, max_len: int, d_model: int) -> None:
         """
         :param max_len: Max sequence length.
@@ -477,7 +466,6 @@ class TransformerBlock(torch.nn.Module):
         self.dropout = torch.nn.Dropout(p=dropout)
-    # pylint: disable=invalid-name
     def forward(
         self,
         x: torch.Tensor,
@@ -537,7 +525,6 @@ class MultiHeadedAttention(torch.nn.Module):
     Take in model size and number of heads.
     """
-    # pylint: disable=invalid-name
     def __init__(self, h: int, d_model: int, dropout: float = 0.1) -> None:
         """
         :param h: Head sizes of multi-head attention.

replay/models/nn/sequential/callbacks/__init__.py CHANGED Viewed

@@ -2,8 +2,8 @@ from .prediction_callbacks import (
     BasePredictionCallback,
     PandasPredictionCallback,
     PolarsPredictionCallback,
+    QueryEmbeddingsPredictionCallback,
     SparkPredictionCallback,
     TorchPredictionCallback,
-    QueryEmbeddingsPredictionCallback
 )
 from .validation_callback import ValidationMetricsCallback

replay/models/nn/sequential/callbacks/prediction_callbacks.py CHANGED Viewed

@@ -1,39 +1,37 @@
 import abc
 from typing import Generic, List, Optional, Protocol, Tuple, TypeVar, cast
-import lightning as L
+import lightning
 import torch
 from replay.models.nn.sequential import Bert4Rec
 from replay.models.nn.sequential.postprocessors import BasePostProcessor
-from replay.utils import PYSPARK_AVAILABLE, PandasDataFrame, PolarsDataFrame, SparkDataFrame, MissingImportType
+from replay.utils import PYSPARK_AVAILABLE, MissingImportType, PandasDataFrame, PolarsDataFrame, SparkDataFrame
 if PYSPARK_AVAILABLE:  # pragma: no cover
+    import pyspark.sql.functions as sf
     from pyspark.sql import SparkSession
-    import pyspark.sql.functions as F
     from pyspark.sql.types import ArrayType, DoubleType, IntegerType, StructType
 else:
     SparkSession = MissingImportType
-# pylint: disable=too-few-public-methods
 class PredictionBatch(Protocol):
     """
     Prediction callback batch
     """
     query_id: torch.LongTensor
 _T = TypeVar("_T")
-# pylint: disable=too-many-instance-attributes
-class BasePredictionCallback(L.Callback, Generic[_T]):
+class BasePredictionCallback(lightning.Callback, Generic[_T]):
     """
     Base callback for prediction stage
     """
-    # pylint: disable=too-many-arguments
     def __init__(
         self,
         top_k: int,
@@ -59,21 +57,21 @@ class BasePredictionCallback(L.Callback, Generic[_T]):
         self._item_batches: List[torch.Tensor] = []
         self._item_scores: List[torch.Tensor] = []
-    # pylint: disable=unused-argument
-    def on_predict_epoch_start(self, trainer: L.Trainer, pl_module: L.LightningModule) -> None:
+    def on_predict_epoch_start(
+        self, trainer: lightning.Trainer, pl_module: lightning.LightningModule  # noqa: ARG002
+    ) -> None:
         self._query_batches.clear()
         self._item_batches.clear()
         self._item_scores.clear()
-    # pylint: disable=unused-argument, too-many-arguments
     def on_predict_batch_end(
         self,
-        trainer: L.Trainer,
-        pl_module: L.LightningModule,
+        trainer: lightning.Trainer,  # noqa: ARG002
+        pl_module: lightning.LightningModule,  # noqa: ARG002
         outputs: torch.Tensor,
         batch: PredictionBatch,
-        batch_idx: int,
-        dataloader_idx: int = 0,
+        batch_idx: int,  # noqa: ARG002
+        dataloader_idx: int = 0,  # noqa: ARG002
     ) -> None:
         query_ids, scores = self._compute_pipeline(batch.query_id, outputs)
         top_scores, top_item_ids = torch.topk(scores, k=self._top_k, dim=1)
@@ -157,7 +155,6 @@ class SparkPredictionCallback(BasePredictionCallback[SparkDataFrame]):
     Callback for prediction stage with spark data frame
     """
-    # pylint: disable=too-many-arguments
     def __init__(
         self,
         top_k: int,
@@ -206,7 +203,7 @@ class SparkPredictionCallback(BasePredictionCallback[SparkDataFrame]):
                 ),
                 schema=schema,
             )
-            .withColumn("exploded_columns", F.explode(F.arrays_zip(self.item_column, self.rating_column)))
+            .withColumn("exploded_columns", sf.explode(sf.arrays_zip(self.item_column, self.rating_column)))
             .select(self.query_column, f"exploded_columns.{self.item_column}", f"exploded_columns.{self.rating_column}")
         )
         return prediction
@@ -247,26 +244,27 @@ class TorchPredictionCallback(BasePredictionCallback[Tuple[torch.LongTensor, tor
         )
-class QueryEmbeddingsPredictionCallback(L.Callback):
+class QueryEmbeddingsPredictionCallback(lightning.Callback):
     """
     Callback for prediction stage to get query embeddings.
     """
     def __init__(self):
         self._embeddings_per_batch: List[torch.Tensor] = []
-    # pylint: disable=unused-argument
-    def on_predict_epoch_start(self, trainer: L.Trainer, pl_module: L.LightningModule) -> None:
+    def on_predict_epoch_start(
+        self, trainer: lightning.Trainer, pl_module: lightning.LightningModule  # noqa: ARG002
+    ) -> None:
         self._embeddings_per_batch.clear()
-    # pylint: disable=unused-argument, too-many-arguments
     def on_predict_batch_end(
         self,
-        trainer: L.Trainer,
-        pl_module: L.LightningModule,
-        outputs: torch.Tensor,
+        trainer: lightning.Trainer,  # noqa: ARG002
+        pl_module: lightning.LightningModule,
+        outputs: torch.Tensor,  # noqa: ARG002
         batch: PredictionBatch,
-        batch_idx: int,
-        dataloader_idx: int = 0,
+        batch_idx: int,  # noqa: ARG002
+        dataloader_idx: int = 0,  # noqa: ARG002
     ) -> None:
         args = [batch.features, batch.padding_mask]
         if isinstance(pl_module, Bert4Rec):

replay-rec 0.16.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

replay-rec 0.16.0py3-none-any.whl → 0.17.0py3-none-any.whl