PyPI - replay-rec - Versions diffs - 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

replay-rec 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

replay/__init__.py +1 -1
replay/data/dataset.py +11 -0
replay/data/nn/__init__.py +3 -0
replay/data/nn/parquet/__init__.py +22 -0
replay/data/nn/parquet/collate.py +29 -0
replay/data/nn/parquet/constants/__init__.py +0 -0
replay/data/nn/parquet/constants/batches.py +8 -0
replay/data/nn/parquet/constants/device.py +3 -0
replay/data/nn/parquet/constants/filesystem.py +3 -0
replay/data/nn/parquet/constants/metadata.py +5 -0
replay/data/nn/parquet/fixed_batch_dataset.py +157 -0
replay/data/nn/parquet/impl/__init__.py +0 -0
replay/data/nn/parquet/impl/array_1d_column.py +140 -0
replay/data/nn/parquet/impl/array_2d_column.py +160 -0
replay/data/nn/parquet/impl/column_protocol.py +17 -0
replay/data/nn/parquet/impl/indexing.py +123 -0
replay/data/nn/parquet/impl/masking.py +20 -0
replay/data/nn/parquet/impl/named_columns.py +100 -0
replay/data/nn/parquet/impl/numeric_column.py +110 -0
replay/data/nn/parquet/impl/utils.py +17 -0
replay/data/nn/parquet/info/__init__.py +0 -0
replay/data/nn/parquet/info/distributed_info.py +40 -0
replay/data/nn/parquet/info/partitioning.py +132 -0
replay/data/nn/parquet/info/replicas.py +67 -0
replay/data/nn/parquet/info/worker_info.py +43 -0
replay/data/nn/parquet/iterable_dataset.py +119 -0
replay/data/nn/parquet/iterator.py +61 -0
replay/data/nn/parquet/metadata/__init__.py +19 -0
replay/data/nn/parquet/metadata/metadata.py +116 -0
replay/data/nn/parquet/parquet_dataset.py +176 -0
replay/data/nn/parquet/parquet_module.py +178 -0
replay/data/nn/parquet/partitioned_iterable_dataset.py +56 -0
replay/data/nn/parquet/utils/__init__.py +0 -0
replay/data/nn/parquet/utils/compute_length.py +66 -0
replay/data/nn/schema.py +12 -14
replay/data/nn/sequence_tokenizer.py +5 -0
replay/data/nn/sequential_dataset.py +4 -0
replay/data/nn/torch_sequential_dataset.py +5 -0
replay/data/utils/__init__.py +0 -0
replay/data/utils/batching.py +69 -0
replay/data/utils/typing/__init__.py +0 -0
replay/data/utils/typing/dtype.py +65 -0
replay/metrics/torch_metrics_builder.py +20 -14
replay/models/nn/loss/sce.py +2 -7
replay/models/nn/optimizer_utils/__init__.py +6 -1
replay/models/nn/optimizer_utils/optimizer_factory.py +15 -0
replay/models/nn/sequential/bert4rec/dataset.py +70 -29
replay/models/nn/sequential/bert4rec/lightning.py +97 -36
replay/models/nn/sequential/bert4rec/model.py +11 -11
replay/models/nn/sequential/callbacks/prediction_callbacks.py +50 -8
replay/models/nn/sequential/callbacks/validation_callback.py +23 -6
replay/models/nn/sequential/compiled/base_compiled_model.py +12 -4
replay/models/nn/sequential/compiled/bert4rec_compiled.py +15 -5
replay/models/nn/sequential/compiled/sasrec_compiled.py +16 -7
replay/models/nn/sequential/postprocessors/_base.py +5 -0
replay/models/nn/sequential/postprocessors/postprocessors.py +4 -0
replay/models/nn/sequential/sasrec/dataset.py +81 -26
replay/models/nn/sequential/sasrec/lightning.py +86 -24
replay/models/nn/sequential/sasrec/model.py +14 -9
replay/nn/__init__.py +8 -0
replay/nn/agg.py +109 -0
replay/nn/attention.py +158 -0
replay/nn/embedding.py +283 -0
replay/nn/ffn.py +135 -0
replay/nn/head.py +49 -0
replay/nn/lightning/__init__.py +1 -0
replay/nn/lightning/callback/__init__.py +9 -0
replay/nn/lightning/callback/metrics_callback.py +183 -0
replay/nn/lightning/callback/predictions_callback.py +314 -0
replay/nn/lightning/module.py +123 -0
replay/nn/lightning/optimizer.py +60 -0
replay/nn/lightning/postprocessor/__init__.py +2 -0
replay/nn/lightning/postprocessor/_base.py +51 -0
replay/nn/lightning/postprocessor/seen_items.py +83 -0
replay/nn/lightning/scheduler.py +91 -0
replay/nn/loss/__init__.py +22 -0
replay/nn/loss/base.py +197 -0
replay/nn/loss/bce.py +216 -0
replay/nn/loss/ce.py +317 -0
replay/nn/loss/login_ce.py +373 -0
replay/nn/loss/logout_ce.py +230 -0
replay/nn/mask.py +87 -0
replay/nn/normalization.py +9 -0
replay/nn/output.py +37 -0
replay/nn/sequential/__init__.py +9 -0
replay/nn/sequential/sasrec/__init__.py +7 -0
replay/nn/sequential/sasrec/agg.py +53 -0
replay/nn/sequential/sasrec/diff_transformer.py +125 -0
replay/nn/sequential/sasrec/model.py +377 -0
replay/nn/sequential/sasrec/transformer.py +107 -0
replay/nn/sequential/twotower/__init__.py +2 -0
replay/nn/sequential/twotower/model.py +674 -0
replay/nn/sequential/twotower/reader.py +89 -0
replay/nn/transform/__init__.py +22 -0
replay/nn/transform/copy.py +38 -0
replay/nn/transform/grouping.py +39 -0
replay/nn/transform/negative_sampling.py +182 -0
replay/nn/transform/next_token.py +100 -0
replay/nn/transform/rename.py +33 -0
replay/nn/transform/reshape.py +41 -0
replay/nn/transform/sequence_roll.py +48 -0
replay/nn/transform/template/__init__.py +2 -0
replay/nn/transform/template/sasrec.py +53 -0
replay/nn/transform/template/twotower.py +22 -0
replay/nn/transform/token_mask.py +69 -0
replay/nn/transform/trim.py +51 -0
replay/nn/utils.py +28 -0
replay/preprocessing/filters.py +128 -0
replay/preprocessing/label_encoder.py +36 -33
replay/preprocessing/utils.py +209 -0
replay/splitters/__init__.py +1 -0
replay/splitters/random_next_n_splitter.py +224 -0
replay/utils/common.py +10 -4
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/METADATA +3 -3
replay_rec-0.21.0.dist-info/RECORD +223 -0
replay_rec-0.20.3.dist-info/RECORD +0 -138
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/WHEEL +0 -0
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/licenses/LICENSE +0 -0
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/licenses/NOTICE +0 -0

replay/models/nn/sequential/bert4rec/dataset.py CHANGED Viewed

@@ -17,7 +17,7 @@ from replay.data.nn import (
 class Bert4RecTrainingBatch(NamedTuple):
     """
     Batch of data for training.
-    Generated by `Bert4RecTrainingDataset`.
+    Generated by ``Bert4RecTrainingDataset``.
     """
     query_id: torch.LongTensor
@@ -26,6 +26,15 @@ class Bert4RecTrainingBatch(NamedTuple):
     tokens_mask: torch.BoolTensor
     labels: torch.LongTensor
+    def convert_to_dict(self) -> dict:
+        return {
+            "query_id": self.query_id,
+            "pad_mask": self.padding_mask,
+            "inputs": self.features,
+            "token_mask": self.tokens_mask,
+            "positive_labels": self.labels,
+        }
 class Bert4RecMasker(abc.ABC):
     """
@@ -85,7 +94,12 @@ class Bert4RecUniformMasker(Bert4RecMasker):
 class Bert4RecTrainingDataset(TorchDataset):
     """
-    Dataset that generates samples to train BERT-like model
+    Dataset that generates samples to train Bert4Rec model.
+    As a result of the dataset iteration, a dictionary is formed.
+    The keys in the dictionary match the names of the arguments in the model's `forward` function.
+    There are also additional keys needed to calculate losses - 'positive_labels`.
+    The `query_id` key is required for possible debugging and calling additional lightning callbacks.
     """
     def __init__(
@@ -143,26 +157,26 @@ class Bert4RecTrainingDataset(TorchDataset):
     def __len__(self) -> int:
         return len(self._inner)
-    def __getitem__(self, index: int) -> Bert4RecTrainingBatch:
+    def __getitem__(self, index: int) -> dict:
         query_id, padding_mask, features = self._inner[index]
         tokens_mask = self._masker.mask(padding_mask)
         assert self._label_feature_name
         labels = features[self._label_feature_name]
-        return Bert4RecTrainingBatch(
-            query_id=query_id,
-            padding_mask=padding_mask,
-            features=features,
-            tokens_mask=tokens_mask,
-            labels=cast(torch.LongTensor, labels),
-        )
+        return {
+            "query_id": query_id,
+            "pad_mask": padding_mask,
+            "inputs": features,
+            "token_mask": tokens_mask,
+            "positive_labels": labels,
+        }
 class Bert4RecPredictionBatch(NamedTuple):
     """
     Batch of data for model inference.
-    Generated by `Bert4RecPredictionDataset`.
+    Generated by ``Bert4RecPredictionDataset``.
     """
     query_id: torch.LongTensor
@@ -170,10 +184,22 @@ class Bert4RecPredictionBatch(NamedTuple):
     features: TensorMap
     tokens_mask: torch.BoolTensor
+    def convert_to_dict(self) -> dict:
+        return {
+            "query_id": self.query_id,
+            "pad_mask": self.padding_mask,
+            "inputs": self.features,
+            "token_mask": self.tokens_mask,
+        }
 class Bert4RecPredictionDataset(TorchDataset):
     """
-    Dataset that generates samples to infer BERT-like model
+    Dataset that generates samples to inference Bert4Rec model
+    As a result of the dataset iteration, a dictionary is formed.
+    The keys in the dictionary match the names of the arguments in the model's `forward` function.
+    The `query_id` key is required for possible debugging and calling additional lightning callbacks.
     """
     def __init__(
@@ -198,23 +224,23 @@ class Bert4RecPredictionDataset(TorchDataset):
     def __len__(self) -> int:
         return len(self._inner)
-    def __getitem__(self, index: int) -> Bert4RecPredictionBatch:
+    def __getitem__(self, index: int) -> dict:
         query_id, padding_mask, features = self._inner[index]
         shifted_features, shifted_padding_mask, tokens_mask = _shift_features(self._schema, features, padding_mask)
-        return Bert4RecPredictionBatch(
-            query_id=query_id,
-            padding_mask=shifted_padding_mask,
-            features=shifted_features,
-            tokens_mask=tokens_mask,
-        )
+        return {
+            "query_id": query_id,
+            "pad_mask": shifted_padding_mask,
+            "inputs": shifted_features,
+            "token_mask": tokens_mask,
+        }
 class Bert4RecValidationBatch(NamedTuple):
     """
     Batch of data for validation.
-    Generated by `Bert4RecValidationDataset`.
+    Generated by ``Bert4RecValidationDataset``.
     """
     query_id: torch.LongTensor
@@ -224,10 +250,25 @@ class Bert4RecValidationBatch(NamedTuple):
     ground_truth: torch.LongTensor
     train: torch.LongTensor
+    def convert_to_dict(self) -> dict:
+        return {
+            "query_id": self.query_id,
+            "pad_mask": self.padding_mask,
+            "inputs": self.features,
+            "token_mask": self.tokens_mask,
+            "ground_truth": self.ground_truth,
+            "train": self.train,
+        }
 class Bert4RecValidationDataset(TorchDataset):
     """
     Dataset that generates samples to infer and validate BERT-like model
+    As a result of the dataset iteration, a dictionary is formed.
+    The keys in the dictionary match the names of the arguments in the model's `forward` function.
+    The `query_id` key is required for possible debugging and calling additional lightning callbacks.
+    Keys 'ground_truth` and `train` keys are required for metrics calculation on validation stage.
     """
     def __init__(
@@ -263,19 +304,19 @@ class Bert4RecValidationDataset(TorchDataset):
     def __len__(self) -> int:
         return len(self._inner)
-    def __getitem__(self, index: int) -> Bert4RecValidationBatch:
+    def __getitem__(self, index: int) -> dict:
         query_id, padding_mask, features, ground_truth, train = self._inner[index]
         shifted_features, shifted_padding_mask, tokens_mask = _shift_features(self._schema, features, padding_mask)
-        return Bert4RecValidationBatch(
-            query_id=query_id,
-            padding_mask=shifted_padding_mask,
-            features=shifted_features,
-            tokens_mask=tokens_mask,
-            ground_truth=ground_truth,
-            train=train,
-        )
+        return {
+            "query_id": query_id,
+            "pad_mask": shifted_padding_mask,
+            "inputs": shifted_features,
+            "token_mask": tokens_mask,
+            "ground_truth": ground_truth,
+            "train": train,
+        }
 def _shift_features(

replay/models/nn/sequential/bert4rec/lightning.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import math
+import warnings
 from typing import Any, Literal, Optional, Union, cast
 import lightning
@@ -29,13 +30,13 @@ class Bert4Rec(lightning.LightningModule):
         enable_embedding_tying: bool = False,
         loss_type: Literal["BCE", "CE", "CE_restricted"] = "CE",
         loss_sample_count: Optional[int] = None,
-        negative_sampling_strategy: str = "global_uniform",
+        negative_sampling_strategy: Literal["global_uniform", "inbatch"] = "global_uniform",
         negatives_sharing: bool = False,
         optimizer_factory: OptimizerFactory = FatOptimizerFactory(),
         lr_scheduler_factory: Optional[LRSchedulerFactory] = None,
     ):
         """
-        :param tensor_schema (TensorSchema): Tensor schema of features.
+        :param tensor_schema: Tensor schema of features.
         :param block_count: Number of Transformer blocks.
             Default: ``2``.
         :param head_count: Number of Attention heads.
@@ -44,7 +45,7 @@ class Bert4Rec(lightning.LightningModule):
             Default: ``256``.
         :param max_seq_len: Max length of sequence.
             Default: ``100``.
-        :param dropout_rate (float): Dropout rate.
+        :param dropout_rate: Dropout rate.
             Default: ``0.1``.
         :param pass_per_transformer_block_count: Number of times to pass data over each Transformer block.
             Default: ``1``.
@@ -54,19 +55,18 @@ class Bert4Rec(lightning.LightningModule):
             If `True` - result scores are calculated by dot product of input and output embeddings,
             if `False` - default linear layer is applied to calculate logits for each item.
             Default: ``False``.
-        :param loss_type: Loss type. Possible values: ``"CE"``, ``"BCE"``, ``"CE_restricted"``.
+        :param loss_type: Loss type.
             Default: ``CE``.
-        :param loss_sample_count (Optional[int]): Sample count to calculate loss.
+        :param loss_sample_count: Sample count to calculate loss.
             Default: ``None``.
         :param negative_sampling_strategy: Negative sampling strategy to calculate loss on sampled negatives.
-            Is used when large count of items in dataset.
-            Possible values: ``"global_uniform"``, ``"inbatch"``
+            Is used when large count of items in dataset.\n
             Default: ``global_uniform``.
-        :param negatives_sharing: Apply negative sharing in calculating sampled logits.
+        :param negatives_sharing: Apply negative sharing in calculating sampled logits.\n
             Default: ``False``.
-        :param optimizer_factory: Optimizer factory.
+        :param optimizer_factory: Optimizer factory.\n
             Default: ``FatOptimizerFactory``.
-        :param lr_scheduler_factory: Learning rate schedule factory.
+        :param lr_scheduler_factory: Learning rate schedule factory.\n
             Default: ``None``.
         """
         super().__init__()
@@ -97,7 +97,7 @@ class Bert4Rec(lightning.LightningModule):
         self._vocab_size = item_count
         self.candidates_to_score = None
-    def training_step(self, batch: Bert4RecTrainingBatch, batch_idx: int) -> torch.Tensor:  # noqa: ARG002
+    def training_step(self, batch: Union[Bert4RecTrainingBatch, dict], batch_idx: int) -> torch.Tensor:  # noqa: ARG002
         """
         :param batch: Batch of training data.
         :param batch_idx: Batch index.
@@ -109,7 +109,7 @@ class Bert4Rec(lightning.LightningModule):
         return loss
     def predict_step(
-        self, batch: Bert4RecPredictionBatch, batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
+        self, batch: Union[Bert4RecPredictionBatch, dict], batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
     ) -> torch.Tensor:
         """
         :param batch (Bert4RecPredictionBatch): Batch of prediction data.
@@ -118,23 +118,49 @@ class Bert4Rec(lightning.LightningModule):
         :returns: Calculated scores on prediction batch.
         """
+        if isinstance(batch, Bert4RecPredictionBatch):
+            warnings.warn(
+                "`Bert4RecPredictionBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         batch = _prepare_prediction_batch(self._schema, self._model.max_len, batch)
-        return self._model_predict(batch.features, batch.padding_mask, batch.tokens_mask)
+        return self._model_predict(
+            feature_tensors=batch["inputs"],
+            padding_mask=batch["pad_mask"],
+            tokens_mask=batch["token_mask"],
+        )
     def predict(
         self,
-        batch: Bert4RecPredictionBatch,
+        batch: Union[Bert4RecPredictionBatch, dict],
         candidates_to_score: Optional[torch.LongTensor] = None,
     ) -> torch.Tensor:
         """
-        :param batch (Bert4RecPredictionBatch): Batch of prediction data.
+        :param batch: Batch of prediction data.
         :param candidates_to_score: Item ids to calculate scores.
             Default: ``None``.
         :returns: Calculated scores on prediction batch.
         """
+        if isinstance(batch, Bert4RecPredictionBatch):
+            warnings.warn(
+                "`Bert4RecPredictionBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         batch = _prepare_prediction_batch(self._schema, self._model.max_len, batch)
-        return self._model_predict(batch.features, batch.padding_mask, batch.tokens_mask, candidates_to_score)
+        return self._model_predict(
+            feature_tensors=batch["inputs"],
+            padding_mask=batch["pad_mask"],
+            tokens_mask=batch["token_mask"],
+            candidates_to_score=candidates_to_score,
+        )
     def forward(
         self,
@@ -152,10 +178,15 @@ class Bert4Rec(lightning.LightningModule):
         :returns: Calculated scores.
         """
-        return self._model_predict(feature_tensors, padding_mask, tokens_mask, candidates_to_score)
+        return self._model_predict(
+            feature_tensors=feature_tensors,
+            padding_mask=padding_mask,
+            tokens_mask=tokens_mask,
+            candidates_to_score=candidates_to_score,
+        )
     def validation_step(
-        self, batch: Bert4RecValidationBatch, batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
+        self, batch: Union[Bert4RecValidationBatch, dict], batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
     ) -> torch.Tensor:
         """
         :param batch: Batch of prediction data.
@@ -163,7 +194,20 @@ class Bert4Rec(lightning.LightningModule):
         :returns: Calculated scores on validation batch.
         """
-        return self._model_predict(batch.features, batch.padding_mask, batch.tokens_mask)
+        if isinstance(batch, Bert4RecValidationBatch):
+            warnings.warn(
+                "`Bert4RecValidationBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
+        return self._model_predict(
+            feature_tensors=batch["inputs"],
+            padding_mask=batch["pad_mask"],
+            tokens_mask=batch["token_mask"],
+        )
     def configure_optimizers(self) -> Any:
         """
@@ -189,10 +233,15 @@ class Bert4Rec(lightning.LightningModule):
             cast(Bert4RecModel, self._model.module) if isinstance(self._model, torch.nn.DataParallel) else self._model
         )
         candidates_to_score = self.candidates_to_score if candidates_to_score is None else candidates_to_score
-        scores = model.predict(feature_tensors, padding_mask, tokens_mask, candidates_to_score)
+        scores = model.predict(
+            inputs=feature_tensors,
+            pad_mask=padding_mask,
+            token_mask=tokens_mask,
+            candidates_to_score=candidates_to_score,
+        )
         return scores
-    def _compute_loss(self, batch: Bert4RecTrainingBatch) -> torch.Tensor:
+    def _compute_loss(self, batch: Union[Bert4RecTrainingBatch, dict]) -> torch.Tensor:
         if self._loss_type == "BCE":
             loss_func = self._compute_loss_bce if self._loss_sample_count is None else self._compute_loss_bce_sampled
         elif self._loss_type == "CE":
@@ -203,11 +252,20 @@ class Bert4Rec(lightning.LightningModule):
             msg = f"Not supported loss type: {self._loss_type}"
             raise ValueError(msg)
+        if isinstance(batch, Bert4RecTrainingBatch):
+            warnings.warn(
+                "`Bert4RecTrainingBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         loss = loss_func(
-            batch.features,
-            batch.labels,
-            batch.padding_mask,  # 0 - padding_idx, 1 - other tokens
-            batch.tokens_mask,  # 0 - masked token, 1 - non-masked token
+            batch["inputs"],
+            batch["positive_labels"],
+            batch["pad_mask"],
+            batch["token_mask"],
         )
         return loss
@@ -253,7 +311,7 @@ class Bert4Rec(lightning.LightningModule):
         padding_mask: torch.BoolTensor,
         tokens_mask: torch.BoolTensor,
     ) -> torch.Tensor:
-        (positive_logits, negative_logits, *_) = self._get_sampled_logits(
+        positive_logits, negative_logits, *_ = self._get_sampled_logits(
             feature_tensors, positive_labels, padding_mask, tokens_mask
         )
@@ -300,7 +358,7 @@ class Bert4Rec(lightning.LightningModule):
         tokens_mask: torch.BoolTensor,
     ) -> torch.Tensor:
         assert self._loss_sample_count is not None
-        (positive_logits, negative_logits, positive_labels, negative_labels, vocab_size) = self._get_sampled_logits(
+        positive_logits, negative_logits, positive_labels, negative_labels, vocab_size = self._get_sampled_logits(
             feature_tensors, positive_labels, padding_mask, tokens_mask
         )
         n_negative_samples = min(self._loss_sample_count, vocab_size)
@@ -325,7 +383,7 @@ class Bert4Rec(lightning.LightningModule):
         padding_mask: torch.BoolTensor,
         tokens_mask: torch.BoolTensor,
     ) -> torch.Tensor:
-        (logits, labels) = self._get_restricted_logits_for_ce_loss(
+        logits, labels = self._get_restricted_logits_for_ce_loss(
             feature_tensors, positive_labels, padding_mask, tokens_mask
         )
@@ -588,20 +646,20 @@ class Bert4Rec(lightning.LightningModule):
         self._schema.item_id_features[self._schema.item_id_feature_name]._set_cardinality(new_vocab_size)
-def _prepare_prediction_batch(
-    schema: TensorSchema, max_len: int, batch: Bert4RecPredictionBatch
-) -> Bert4RecPredictionBatch:
-    if batch.padding_mask.shape[1] > max_len:
+def _prepare_prediction_batch(schema: TensorSchema, max_len: int, batch: dict) -> dict:
+    seq_len = batch["pad_mask"].shape[1]
+    if seq_len > max_len:
         msg = (
             f"The length of the submitted sequence "
             "must not exceed the maximum length of the sequence. "
-            f"The length of the sequence is given {batch.padding_mask.shape[1]}, "
+            f"The length of the sequence is given {seq_len}, "
             f"while the maximum length is {max_len}"
         )
         raise ValueError(msg)
-    if batch.padding_mask.shape[1] < max_len:
-        query_id, padding_mask, features, _ = batch
+    if seq_len < max_len:
+        padding_mask = batch["pad_mask"]
+        features = batch["inputs"].copy()
         sequence_item_count = padding_mask.shape[1]
         for feature_name, feature_tensor in features.items():
             if schema[feature_name].is_cat:
@@ -618,5 +676,8 @@ def _prepare_prediction_batch(
                 ).unsqueeze(-1)
         padding_mask = torch.nn.functional.pad(padding_mask, (max_len - sequence_item_count, 0), value=0)
         shifted_features, shifted_padding_mask, tokens_mask = _shift_features(schema, features, padding_mask)
-        batch = Bert4RecPredictionBatch(query_id, shifted_padding_mask, shifted_features, tokens_mask)
+        batch["pad_mask"] = shifted_padding_mask
+        batch["inputs"] = shifted_features
+        batch["token_mask"] = tokens_mask
     return batch

replay/models/nn/sequential/bert4rec/model.py CHANGED Viewed

@@ -88,8 +88,8 @@ class Bert4RecModel(torch.nn.Module):
     def forward(self, inputs: TensorMap, pad_mask: torch.BoolTensor, token_mask: torch.BoolTensor) -> torch.Tensor:
         """
         :param inputs: Batch of features.
-        :param pad_mask: Padding mask where 0 - <PAD>, 1 otherwise.
-        :param token_mask: Token mask where 0 - <MASK> tokens, 1 otherwise.
+        :param pad_mask: Padding mask where 0 - ``<PAD>``, 1 - otherwise.
+        :param token_mask: Token mask where 0 - ``<MASK>`` tokens, 1 - otherwise.
         :returns: Calculated scores.
         """
@@ -107,12 +107,12 @@ class Bert4RecModel(torch.nn.Module):
     ) -> torch.Tensor:
         """
         :param inputs: Batch of features.
-        :param pad_mask: Padding mask where 0 - <PAD>, 1 otherwise.
-        :param token_mask: Token mask where 0 - <MASK> tokens, 1 otherwise.
-        :param candidates_to_score: Item ids to calculate scores.
-            if `None` predicts for all items
+        :param pad_mask: Padding mask where 0 - ``<PAD>``, 1 - otherwise.
+        :param token_mask: Token mask where 0 - ``<MASK>`` tokens, 1 - otherwise.
+        :param candidates_to_score: Item ids to calculate scores.\n
+            If ``None`` then predicts for all items. Default: ``None``.
-        :returns: Calculated scores among canditates_to_score items.
+        :returns: Calculated scores among ``canditates_to_score`` items.
         """
         # final_emb: [B x E]
         final_emb = self.get_query_embeddings(inputs, pad_mask, token_mask)
@@ -123,8 +123,8 @@ class Bert4RecModel(torch.nn.Module):
         """
         :param inputs (TensorMap): Batch of features.
-        :param pad_mask (torch.BoolTensor): Padding mask where 0 - <PAD>, 1 otherwise.
-        :param token_mask (torch.BoolTensor): Token mask where 0 - <MASK> tokens, 1 otherwise.
+        :param pad_mask (torch.BoolTensor): Padding mask where 0 - ``<PAD>``, 1 - otherwise.
+        :param token_mask (torch.BoolTensor): Token mask where 0 - ``<MASK>`` tokens, 1 - otherwise.
         :returns: Output embeddings.
         """
@@ -158,8 +158,8 @@ class Bert4RecModel(torch.nn.Module):
     def get_query_embeddings(self, inputs: TensorMap, pad_mask: torch.BoolTensor, token_mask: torch.BoolTensor):
         """
         :param inputs: Batch of features.
-        :param pad_mask: Padding mask where 0 - <PAD>, 1 otherwise.
-        :param token_mask: Token mask where 0 - <MASK> tokens, 1 otherwise.
+        :param pad_mask: Padding mask where 0 - ``<PAD>``, 1 - otherwise.
+        :param token_mask: Token mask where 0 - ``<MASK>`` tokens, 1 - otherwise.
         :returns: Query embeddings.
         """

replay/models/nn/sequential/callbacks/prediction_callbacks.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import abc
-from typing import Generic, Optional, Protocol, TypeVar, cast
+import inspect
+from typing import Generic, Optional, Protocol, TypeVar, Union, cast
 import lightning
 import torch
+from typing_extensions import deprecated
 from replay.models.nn.sequential import Bert4Rec
 from replay.models.nn.sequential.postprocessors import BasePostProcessor
@@ -16,6 +18,7 @@ else:
     SparkSession = MissingImport
+@deprecated("`PredictionBatch` class is deprecated.", stacklevel=2)
 class PredictionBatch(Protocol):
     """
     Prediction callback batch
@@ -27,6 +30,10 @@ class PredictionBatch(Protocol):
 _T = TypeVar("_T")
+@deprecated(
+    "`BasePredictionCallback` class is deprecated. Use `replay.nn.lightning.callback.TopItemsCallbackBase` instead.",
+    stacklevel=2,
+)
 class BasePredictionCallback(lightning.Callback, Generic[_T]):
     """
     Base callback for prediction stage
@@ -48,6 +55,7 @@ class BasePredictionCallback(lightning.Callback, Generic[_T]):
         :param postprocessors: postprocessors to apply.
         """
         super().__init__()
         self.query_column = query_column
         self.item_column = item_column
         self.rating_column = rating_column
@@ -74,11 +82,14 @@ class BasePredictionCallback(lightning.Callback, Generic[_T]):
         trainer: lightning.Trainer,  # noqa: ARG002
         pl_module: lightning.LightningModule,  # noqa: ARG002
         outputs: torch.Tensor,
-        batch: PredictionBatch,
+        batch: Union[PredictionBatch, dict],
         batch_idx: int,  # noqa: ARG002
         dataloader_idx: int = 0,  # noqa: ARG002
     ) -> None:
-        query_ids, scores = self._compute_pipeline(batch.query_id, outputs)
+        query_ids, scores = self._compute_pipeline(
+            batch["query_id"] if isinstance(batch, dict) else batch.query_id,
+            outputs,
+        )
         top_scores, top_item_ids = torch.topk(scores, k=self._top_k, dim=1)
         self._query_batches.append(query_ids)
         self._item_batches.append(top_item_ids)
@@ -112,6 +123,10 @@ class BasePredictionCallback(lightning.Callback, Generic[_T]):
         pass
+@deprecated(
+    "`PandasPredictionCallback` class is deprecated. "
+    "Use `replay.nn.lightning.callback.PandasTopItemsCallback` instead."
+)
 class PandasPredictionCallback(BasePredictionCallback[PandasDataFrame]):
     """
     Callback for predition stage with pandas data frame
@@ -133,6 +148,10 @@ class PandasPredictionCallback(BasePredictionCallback[PandasDataFrame]):
         return prediction.explode([self.item_column, self.rating_column])
+@deprecated(
+    "`PolarsPredictionCallback` class is deprecated. "
+    "Use `replay.nn.lightning.callback.PolarsTopItemsCallback` instead."
+)
 class PolarsPredictionCallback(BasePredictionCallback[PolarsDataFrame]):
     """
     Callback for predition stage with polars data frame
@@ -154,6 +173,10 @@ class PolarsPredictionCallback(BasePredictionCallback[PolarsDataFrame]):
         return prediction.explode([self.item_column, self.rating_column])
+@deprecated(
+    "`SparkPredictionCallback` class is deprecated. "
+    "Use `replay.nn.lightning.callback.SparkTopItemsCallback` instead."
+)
 class SparkPredictionCallback(BasePredictionCallback[SparkDataFrame]):
     """
     Callback for prediction stage with spark data frame
@@ -213,6 +236,10 @@ class SparkPredictionCallback(BasePredictionCallback[SparkDataFrame]):
         return prediction
+@deprecated(
+    "`TorchPredictionCallback` class is deprecated. "
+    "Use `replay.nn.lightning.callback.TorchTopItemsCallback` instead."
+)
 class TorchPredictionCallback(BasePredictionCallback[tuple[torch.LongTensor, torch.LongTensor, torch.Tensor]]):
     """
     Callback for predition stage with tuple of tensors
@@ -248,6 +275,10 @@ class TorchPredictionCallback(BasePredictionCallback[tuple[torch.LongTensor, tor
         )
+@deprecated(
+    "`QueryEmbeddingsPredictionCallback` class is deprecated. "
+    "Use `replay.nn.lightning.callback.HiddenStatesCallback` instead."
+)
 class QueryEmbeddingsPredictionCallback(lightning.Callback):
     """
     Callback for prediction stage to get query embeddings.
@@ -266,15 +297,26 @@ class QueryEmbeddingsPredictionCallback(lightning.Callback):
         trainer: lightning.Trainer,  # noqa: ARG002
         pl_module: lightning.LightningModule,
         outputs: torch.Tensor,  # noqa: ARG002
-        batch: PredictionBatch,
+        batch: Union[PredictionBatch, dict],
         batch_idx: int,  # noqa: ARG002
         dataloader_idx: int = 0,  # noqa: ARG002
     ) -> None:
-        args = [batch.features, batch.padding_mask]
-        if isinstance(pl_module, Bert4Rec):
-            args.append(batch.tokens_mask)
+        if isinstance(batch, dict):
+            modified_batch = {
+                k: v
+                for k, v in batch.items()
+                if k in inspect.signature(pl_module._model.get_query_embeddings).parameters
+            }
+            query_embeddings = pl_module._model.get_query_embeddings(**modified_batch)
+        else:
+            args = [
+                batch.features,
+                batch.padding_mask,
+            ]
+            if isinstance(pl_module, Bert4Rec):
+                args.append(batch.tokens_mask)
+            query_embeddings = pl_module._model.get_query_embeddings(*args)
-        query_embeddings = pl_module._model.get_query_embeddings(*args)
         self._embeddings_per_batch.append(query_embeddings)
     def get_result(self):

replay-rec 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

replay-rec 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl