PyPI - replay-rec - Versions diffs - 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

replay-rec 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (119) hide show

replay/__init__.py +1 -1
replay/data/dataset.py +11 -0
replay/data/nn/__init__.py +3 -0
replay/data/nn/parquet/__init__.py +22 -0
replay/data/nn/parquet/collate.py +29 -0
replay/data/nn/parquet/constants/__init__.py +0 -0
replay/data/nn/parquet/constants/batches.py +8 -0
replay/data/nn/parquet/constants/device.py +3 -0
replay/data/nn/parquet/constants/filesystem.py +3 -0
replay/data/nn/parquet/constants/metadata.py +5 -0
replay/data/nn/parquet/fixed_batch_dataset.py +157 -0
replay/data/nn/parquet/impl/__init__.py +0 -0
replay/data/nn/parquet/impl/array_1d_column.py +140 -0
replay/data/nn/parquet/impl/array_2d_column.py +160 -0
replay/data/nn/parquet/impl/column_protocol.py +17 -0
replay/data/nn/parquet/impl/indexing.py +123 -0
replay/data/nn/parquet/impl/masking.py +20 -0
replay/data/nn/parquet/impl/named_columns.py +100 -0
replay/data/nn/parquet/impl/numeric_column.py +110 -0
replay/data/nn/parquet/impl/utils.py +17 -0
replay/data/nn/parquet/info/__init__.py +0 -0
replay/data/nn/parquet/info/distributed_info.py +40 -0
replay/data/nn/parquet/info/partitioning.py +132 -0
replay/data/nn/parquet/info/replicas.py +67 -0
replay/data/nn/parquet/info/worker_info.py +43 -0
replay/data/nn/parquet/iterable_dataset.py +119 -0
replay/data/nn/parquet/iterator.py +61 -0
replay/data/nn/parquet/metadata/__init__.py +19 -0
replay/data/nn/parquet/metadata/metadata.py +116 -0
replay/data/nn/parquet/parquet_dataset.py +176 -0
replay/data/nn/parquet/parquet_module.py +178 -0
replay/data/nn/parquet/partitioned_iterable_dataset.py +56 -0
replay/data/nn/parquet/utils/__init__.py +0 -0
replay/data/nn/parquet/utils/compute_length.py +66 -0
replay/data/nn/schema.py +12 -14
replay/data/nn/sequence_tokenizer.py +5 -0
replay/data/nn/sequential_dataset.py +4 -0
replay/data/nn/torch_sequential_dataset.py +5 -0
replay/data/utils/__init__.py +0 -0
replay/data/utils/batching.py +69 -0
replay/data/utils/typing/__init__.py +0 -0
replay/data/utils/typing/dtype.py +65 -0
replay/metrics/torch_metrics_builder.py +20 -14
replay/models/nn/loss/sce.py +2 -7
replay/models/nn/optimizer_utils/__init__.py +6 -1
replay/models/nn/optimizer_utils/optimizer_factory.py +15 -0
replay/models/nn/sequential/bert4rec/dataset.py +70 -29
replay/models/nn/sequential/bert4rec/lightning.py +97 -36
replay/models/nn/sequential/bert4rec/model.py +11 -11
replay/models/nn/sequential/callbacks/prediction_callbacks.py +50 -8
replay/models/nn/sequential/callbacks/validation_callback.py +23 -6
replay/models/nn/sequential/compiled/base_compiled_model.py +12 -4
replay/models/nn/sequential/compiled/bert4rec_compiled.py +15 -5
replay/models/nn/sequential/compiled/sasrec_compiled.py +16 -7
replay/models/nn/sequential/postprocessors/_base.py +5 -0
replay/models/nn/sequential/postprocessors/postprocessors.py +4 -0
replay/models/nn/sequential/sasrec/dataset.py +81 -26
replay/models/nn/sequential/sasrec/lightning.py +86 -24
replay/models/nn/sequential/sasrec/model.py +14 -9
replay/nn/__init__.py +8 -0
replay/nn/agg.py +109 -0
replay/nn/attention.py +158 -0
replay/nn/embedding.py +283 -0
replay/nn/ffn.py +135 -0
replay/nn/head.py +49 -0
replay/nn/lightning/__init__.py +1 -0
replay/nn/lightning/callback/__init__.py +9 -0
replay/nn/lightning/callback/metrics_callback.py +183 -0
replay/nn/lightning/callback/predictions_callback.py +314 -0
replay/nn/lightning/module.py +123 -0
replay/nn/lightning/optimizer.py +60 -0
replay/nn/lightning/postprocessor/__init__.py +2 -0
replay/nn/lightning/postprocessor/_base.py +51 -0
replay/nn/lightning/postprocessor/seen_items.py +83 -0
replay/nn/lightning/scheduler.py +91 -0
replay/nn/loss/__init__.py +22 -0
replay/nn/loss/base.py +197 -0
replay/nn/loss/bce.py +216 -0
replay/nn/loss/ce.py +317 -0
replay/nn/loss/login_ce.py +373 -0
replay/nn/loss/logout_ce.py +230 -0
replay/nn/mask.py +87 -0
replay/nn/normalization.py +9 -0
replay/nn/output.py +37 -0
replay/nn/sequential/__init__.py +9 -0
replay/nn/sequential/sasrec/__init__.py +7 -0
replay/nn/sequential/sasrec/agg.py +53 -0
replay/nn/sequential/sasrec/diff_transformer.py +125 -0
replay/nn/sequential/sasrec/model.py +377 -0
replay/nn/sequential/sasrec/transformer.py +107 -0
replay/nn/sequential/twotower/__init__.py +2 -0
replay/nn/sequential/twotower/model.py +674 -0
replay/nn/sequential/twotower/reader.py +89 -0
replay/nn/transform/__init__.py +22 -0
replay/nn/transform/copy.py +38 -0
replay/nn/transform/grouping.py +39 -0
replay/nn/transform/negative_sampling.py +182 -0
replay/nn/transform/next_token.py +100 -0
replay/nn/transform/rename.py +33 -0
replay/nn/transform/reshape.py +41 -0
replay/nn/transform/sequence_roll.py +48 -0
replay/nn/transform/template/__init__.py +2 -0
replay/nn/transform/template/sasrec.py +53 -0
replay/nn/transform/template/twotower.py +22 -0
replay/nn/transform/token_mask.py +69 -0
replay/nn/transform/trim.py +51 -0
replay/nn/utils.py +28 -0
replay/preprocessing/filters.py +128 -0
replay/preprocessing/label_encoder.py +36 -33
replay/preprocessing/utils.py +209 -0
replay/splitters/__init__.py +1 -0
replay/splitters/random_next_n_splitter.py +224 -0
replay/utils/common.py +10 -4
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/METADATA +3 -3
replay_rec-0.21.0.dist-info/RECORD +223 -0
replay_rec-0.20.3.dist-info/RECORD +0 -138
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/WHEEL +0 -0
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/licenses/LICENSE +0 -0
{replay_rec-0.20.3.dist-info → replay_rec-0.21.0.dist-info}/licenses/NOTICE +0 -0

replay/models/nn/sequential/callbacks/validation_callback.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from typing import Any, Literal, Optional, Protocol
+from typing import Any, Literal, Optional, Protocol, Union
 import lightning
 import torch
 from lightning.pytorch.utilities.rank_zero import rank_zero_only
+from typing_extensions import deprecated
 from replay.metrics.torch_metrics_builder import TorchMetricsBuilder, metrics_to_df
 from replay.models.nn.sequential.postprocessors import BasePostProcessor
@@ -18,6 +19,7 @@ CallbackMetricName = Literal[
 ]
+@deprecated("`ValidationBatch` class is deprecated.", stacklevel=2)
 class ValidationBatch(Protocol):
     """
     Validation callback batch
@@ -28,12 +30,19 @@ class ValidationBatch(Protocol):
     train: torch.LongTensor
+@deprecated(
+    "`ValidationMetricsCallback` class is deprecated. "
+    "Use `replay.nn.lightning.callback.ComputeMetricsCallback` instead."
+)
 class ValidationMetricsCallback(lightning.Callback):
     """
     Callback for validation and testing stages.
     If multiple validation/testing dataloaders are used,
     the suffix of the metric name will contain the serial number of the dataloader.
+    For the callback to work correctly, the batch must contain the `query_id` and `ground_truth` keys.
+    If you want to calculate the coverage or novelty metrics then the batch must additionally contain the `train` key.
     """
     def __init__(
@@ -95,7 +104,7 @@ class ValidationMetricsCallback(lightning.Callback):
         trainer: lightning.Trainer,
         pl_module: lightning.LightningModule,
         outputs: torch.Tensor,
-        batch: ValidationBatch,
+        batch: Union[ValidationBatch, dict],
         batch_idx: int,
         dataloader_idx: int = 0,
     ) -> None:
@@ -106,7 +115,7 @@ class ValidationMetricsCallback(lightning.Callback):
         trainer: lightning.Trainer,
         pl_module: lightning.LightningModule,
         outputs: torch.Tensor,
-        batch: ValidationBatch,
+        batch: Union[ValidationBatch, dict],
         batch_idx: int,
         dataloader_idx: int = 0,
     ) -> None:  # pragma: no cover
@@ -117,13 +126,21 @@ class ValidationMetricsCallback(lightning.Callback):
         trainer: lightning.Trainer,  # noqa: ARG002
         pl_module: lightning.LightningModule,
         outputs: torch.Tensor,
-        batch: ValidationBatch,
+        batch: Union[ValidationBatch, dict],
         batch_idx: int,
         dataloader_idx: int,
     ) -> None:
-        _, seen_scores, seen_ground_truth = self._compute_pipeline(batch.query_id, outputs, batch.ground_truth)
+        _, seen_scores, seen_ground_truth = self._compute_pipeline(
+            batch["query_id"] if isinstance(batch, dict) else batch.query_id,
+            outputs,
+            batch["ground_truth"] if isinstance(batch, dict) else batch.ground_truth,
+        )
         sampled_items = torch.topk(seen_scores, k=self._metrics_builders[dataloader_idx].max_k, dim=1).indices
-        self._metrics_builders[dataloader_idx].add_prediction(sampled_items, seen_ground_truth, batch.train)
+        self._metrics_builders[dataloader_idx].add_prediction(
+            sampled_items,
+            seen_ground_truth,
+            batch.get("train") if isinstance(batch, dict) else batch.train,
+        )
         if batch_idx + 1 == self._dataloaders_size[dataloader_idx]:
             pl_module.log_dict(

replay/models/nn/sequential/compiled/base_compiled_model.py CHANGED Viewed

@@ -101,17 +101,24 @@ class BaseCompiledModel:
             )
             raise ValueError(msg)
-    def _valilade_predict_input(self, batch: Any, candidates_to_score: Optional[torch.LongTensor] = None) -> None:
+    def _validate_predict_input(
+        self,
+        batch: Any,
+        candidates_to_score: Optional[torch.LongTensor] = None,
+        padding_mask_key_name: str = "padding_mask",
+    ) -> None:
         if self._num_candidates_to_score is None and candidates_to_score is not None:
             msg = (
                 "If ``num_candidates_to_score`` is None, "
                 "it is impossible to infer the model with passed ``candidates_to_score``."
             )
             raise ValueError(msg)
-        if self._batch_size != -1 and batch.padding_mask.shape[0] != self._batch_size:
+        input_batch_size = (
+            batch[padding_mask_key_name].shape[0] if isinstance(batch, dict) else batch.padding_mask.shape[0]
+        )
+        if self._batch_size != -1 and input_batch_size != self._batch_size:
             msg = (
-                f"The batch is smaller then defined batch_size={self._batch_size}. "
+                f"The batch is smaller than defined batch_size={self._batch_size}. "
                 "It is impossible to infer the model with dynamic batch size in ``mode`` = ``batch``. "
                 "Use ``mode`` = ``dynamic_batch_size``."
             )
@@ -215,6 +222,7 @@ class BaseCompiledModel:
             input_names=model_input_names,
             output_names=["scores"],
             dynamic_axes=model_dynamic_axes_in_input,
+            dynamo=False,
         )
         del lightning_model

replay/models/nn/sequential/compiled/bert4rec_compiled.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pathlib
+import warnings
 from typing import Optional, Union, get_args
 import openvino as ov
@@ -39,7 +40,7 @@ class Bert4RecCompiled(BaseCompiledModel):
     def predict(
         self,
-        batch: Bert4RecPredictionBatch,
+        batch: Union[Bert4RecPredictionBatch, dict],
         candidates_to_score: Optional[torch.LongTensor] = None,
     ) -> torch.Tensor:
         """
@@ -51,13 +52,22 @@ class Bert4RecCompiled(BaseCompiledModel):
         :return: Tensor with scores.
         """
-        self._valilade_predict_input(batch, candidates_to_score)
+        self._validate_predict_input(batch, candidates_to_score, "pad_mask")
+        if isinstance(batch, Bert4RecPredictionBatch):
+            warnings.warn(
+                "`Bert4RecPredictionBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         batch = _prepare_prediction_batch(self._schema, self._max_seq_len, batch)
         model_inputs = {
-            self._inputs_names[0]: batch.features[self._inputs_names[0]],
-            self._inputs_names[1]: batch.padding_mask,
-            self._inputs_names[2]: batch.tokens_mask,
+            self._inputs_names[0]: batch["inputs"][self._inputs_names[0]],
+            self._inputs_names[1]: batch["pad_mask"],
+            self._inputs_names[2]: batch["token_mask"],
         }
         if self._num_candidates_to_score is not None:
             self._validate_candidates_to_score(candidates_to_score)

replay/models/nn/sequential/compiled/sasrec_compiled.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pathlib
+import warnings
 from typing import Optional, Union, get_args
 import openvino as ov
@@ -39,7 +40,7 @@ class SasRecCompiled(BaseCompiledModel):
     def predict(
         self,
-        batch: SasRecPredictionBatch,
+        batch: Union[SasRecPredictionBatch, dict],
         candidates_to_score: Optional[torch.LongTensor] = None,
     ) -> torch.Tensor:
         """
@@ -51,12 +52,21 @@ class SasRecCompiled(BaseCompiledModel):
         :return: Tensor with scores.
         """
-        self._valilade_predict_input(batch, candidates_to_score)
+        self._validate_predict_input(batch, candidates_to_score)
+        if isinstance(batch, SasRecPredictionBatch):
+            warnings.warn(
+                "`SasRecPredictionBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         batch = _prepare_prediction_batch(self._schema, self._max_seq_len, batch)
         model_inputs = {
-            self._inputs_names[0]: batch.features[self._inputs_names[0]],
-            self._inputs_names[1]: batch.padding_mask,
+            self._inputs_names[0]: batch["feature_tensor"][self._inputs_names[0]],
+            self._inputs_names[1]: batch["padding_mask"],
         }
         if self._num_candidates_to_score is not None:
             self._validate_candidates_to_score(candidates_to_score)
@@ -77,15 +87,14 @@ class SasRecCompiled(BaseCompiledModel):
         Model compilation.
         :param model: Path to lightning SasRec model saved in .ckpt format or the SasRec object itself.
-        :param mode: Inference mode, defines shape of inputs.
-            Could be one of [``one_query``, ``batch``, ``dynamic_batch_size``].\n
+        :param mode: Inference mode, defines shape of inputs.\n
             ``one_query`` - sets input shape to [1, max_seq_len]\n
             ``batch`` - sets input shape to [batch_size, max_seq_len]\n
             ``dynamic_batch_size`` - sets batch_size to dynamic range [?, max_seq_len]\n
             Default: ``one_query``.
         :param batch_size: Batch size, required for ``batch`` mode.
             Default: ``None``.
-        :param num_candidates_to_score: Number of item ids to calculate scores.
+        :param num_candidates_to_score: Number of item ids to calculate scores.\n
             Could be one of [``None``, ``-1``, ``N``].\n
             ``-1`` - sets candidates_to_score shape to dynamic range [1, ?]\n
             ``N`` - sets candidates_to_score shape to [1, N]\n

replay/models/nn/sequential/postprocessors/_base.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import abc
 import torch
+from typing_extensions import deprecated
+@deprecated(
+    "`BasePostProcessor` class is deprecated. Use `replay.nn.lightning.postprocessor.PostprocessorBase` instead.",
+    stacklevel=2,
+)
 class BasePostProcessor(abc.ABC):  # pragma: no cover
     """
     Abstract base class for post processor

replay/models/nn/sequential/postprocessors/postprocessors.py CHANGED Viewed

@@ -3,12 +3,14 @@ from typing import Optional, Union, cast
 import numpy as np
 import pandas as pd
 import torch
+from typing_extensions import deprecated
 from replay.data.nn import SequentialDataset
 from ._base import BasePostProcessor
+@deprecated("`RemoveSeenItems` class is deprecated. Use `replay.nn.lightning.postprocessor.SeenItemsFilter` instead.")
 class RemoveSeenItems(BasePostProcessor):
     """
     Filters out the items that already have been seen in dataset.
@@ -16,6 +18,7 @@ class RemoveSeenItems(BasePostProcessor):
     def __init__(self, sequential: SequentialDataset) -> None:
         super().__init__()
         self._sequential = sequential
         self._apply_candidates = False
         self._candidates = None
@@ -107,6 +110,7 @@ class RemoveSeenItems(BasePostProcessor):
         self._candidates = candidates
+@deprecated("`SampleItems` class is deprecated.")
 class SampleItems(BasePostProcessor):
     """
     Generates negative samples to compute sampled metrics

replay/models/nn/sequential/sasrec/dataset.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from typing import NamedTuple, Optional, cast
+from typing import NamedTuple, Optional
 import torch
 from torch.utils.data import Dataset as TorchDataset
+from typing_extensions import deprecated
 from replay.data.nn import (
     MutableTensorMap,
@@ -12,6 +13,10 @@ from replay.data.nn import (
 )
+@deprecated(
+    "`SasRecTrainingBatch` class is deprecated.",
+    stacklevel=2,
+)
 class SasRecTrainingBatch(NamedTuple):
     """
     Batch of data for training.
@@ -24,10 +29,25 @@ class SasRecTrainingBatch(NamedTuple):
     labels: torch.LongTensor
     labels_padding_mask: torch.BoolTensor
+    def convert_to_dict(self) -> dict:
+        return {
+            "query_id": self.query_id,
+            "feature_tensor": self.features,
+            "padding_mask": self.padding_mask,
+            "positive_labels": self.labels,
+            "target_padding_mask": self.labels_padding_mask,
+        }
+@deprecated("`SasRecTrainingDataset` class is deprecated. Use `replay.data.nn.ParquetModule` instead.")
 class SasRecTrainingDataset(TorchDataset):
     """
-    Dataset that generates samples to train SasRec-like model
+    Dataset that generates samples to train SasRec model.
+    As a result of the dataset iteration, a dictionary is formed.
+    The keys in the dictionary match the names of the arguments in the model's `forward` function.
+    There are also additional keys needed to calculate losses - 'positive_labels`, `target_padding_mask`.
+    The `query_id` key is required for possible debugging and calling additional lightning callbacks.
     """
     def __init__(
@@ -81,7 +101,7 @@ class SasRecTrainingDataset(TorchDataset):
     def __len__(self) -> int:
         return len(self._inner)
-    def __getitem__(self, index: int) -> SasRecTrainingBatch:
+    def __getitem__(self, index: int) -> dict:
         query_id, padding_mask, features = self._inner[index]
         assert self._label_feature_name
@@ -97,15 +117,19 @@ class SasRecTrainingDataset(TorchDataset):
         output_features_padding_mask = padding_mask[: -self._sequence_shift]
-        return SasRecTrainingBatch(
-            query_id=query_id,
-            features=output_features,
-            padding_mask=cast(torch.BoolTensor, output_features_padding_mask),
-            labels=cast(torch.LongTensor, labels),
-            labels_padding_mask=cast(torch.BoolTensor, labels_padding_mask),
-        )
+        return {
+            "query_id": query_id,
+            "feature_tensor": output_features,
+            "padding_mask": output_features_padding_mask,
+            "positive_labels": labels,
+            "target_padding_mask": labels_padding_mask,
+        }
+@deprecated(
+    "`SasRecPredictionBatch` class is deprecated.",
+    stacklevel=2,
+)
 class SasRecPredictionBatch(NamedTuple):
     """
     Batch of data for model inference.
@@ -116,10 +140,22 @@ class SasRecPredictionBatch(NamedTuple):
     padding_mask: torch.BoolTensor
     features: TensorMap
+    def convert_to_dict(self) -> dict:
+        return {
+            "query_id": self.query_id,
+            "feature_tensor": self.features,
+            "padding_mask": self.padding_mask,
+        }
+@deprecated("`SasRecPredictionDataset` class is deprecated. Use `replay.data.nn.ParquetModule` instead.")
 class SasRecPredictionDataset(TorchDataset):
     """
-    Dataset that generates samples to infer SasRec-like model
+    Dataset that generates samples to infer SasRec model
+    As a result of the dataset iteration, a dictionary is formed.
+    The keys in the dictionary match the names of the arguments in the model's `forward` function.
+    The `query_id` key is required for possible debugging and calling additional lightning callbacks.
     """
     def __init__(
@@ -143,15 +179,19 @@ class SasRecPredictionDataset(TorchDataset):
     def __len__(self) -> int:
         return len(self._inner)
-    def __getitem__(self, index: int) -> SasRecPredictionBatch:
+    def __getitem__(self, index: int) -> dict:
         query_id, padding_mask, features = self._inner[index]
-        return SasRecPredictionBatch(
-            query_id=query_id,
-            padding_mask=padding_mask,
-            features=features,
-        )
+        return {
+            "query_id": query_id,
+            "padding_mask": padding_mask,
+            "feature_tensor": features,
+        }
+@deprecated(
+    "`SasRecValidationBatch` class is deprecated.",
+    stacklevel=2,
+)
 class SasRecValidationBatch(NamedTuple):
     """
     Batch of data for validation.
@@ -164,10 +204,25 @@ class SasRecValidationBatch(NamedTuple):
     ground_truth: torch.LongTensor
     train: torch.LongTensor
+    def convert_to_dict(self) -> dict:
+        return {
+            "query_id": self.query_id,
+            "feature_tensor": self.features,
+            "padding_mask": self.padding_mask,
+            "ground_truth": self.ground_truth,
+            "train": self.train,
+        }
+@deprecated("`SasRecValidationDataset` class is deprecated. Use `replay.data.nn.ParquetModule` instead.")
 class SasRecValidationDataset(TorchDataset):
     """
-    Dataset that generates samples to infer and validate SasRec-like model
+    Dataset that generates samples to infer and validate SasRec model.
+    As a result of the dataset iteration, a dictionary is formed.
+    The keys in the dictionary match the names of the arguments in the model's `forward` function.
+    The `query_id` key is required for possible debugging and calling additional lightning callbacks.
+    Keys 'ground_truth` and `train` keys are required for metrics calculation on validation stage.
     """
     def __init__(
@@ -202,12 +257,12 @@ class SasRecValidationDataset(TorchDataset):
     def __len__(self) -> int:
         return len(self._inner)
-    def __getitem__(self, index: int) -> SasRecValidationBatch:
+    def __getitem__(self, index: int) -> dict:
         query_id, padding_mask, features, ground_truth, train = self._inner[index]
-        return SasRecValidationBatch(
-            query_id=query_id,
-            padding_mask=padding_mask,
-            features=features,
-            ground_truth=ground_truth,
-            train=train,
-        )
+        return {
+            "query_id": query_id,
+            "padding_mask": padding_mask,
+            "feature_tensor": features,
+            "ground_truth": ground_truth,
+            "train": train,
+        }

replay/models/nn/sequential/sasrec/lightning.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import math
+import warnings
 from typing import Any, Literal, Optional, Union, cast
 import lightning
 import torch
+from typing_extensions import deprecated
 from replay.data.nn import TensorMap, TensorSchema
 from replay.models.nn.loss import ScalableCrossEntropyLoss, SCEParams
@@ -12,6 +14,11 @@ from .dataset import SasRecPredictionBatch, SasRecTrainingBatch, SasRecValidatio
 from .model import SasRecModel
+@deprecated(
+    "`SasRec` class is deprecated. "
+    "Use `replay.nn.sequential.SasRec` "
+    "and `replay.nn.lightning.LightningModule` instead."
+)
 class SasRec(lightning.LightningModule):
     """
     SASRec Lightning module.
@@ -54,9 +61,9 @@ class SasRec(lightning.LightningModule):
             Default: ``False``.
         :param time_span: Time span value.
             Default: ``256``.
-        :param loss_type: Loss type. Possible values: ``"CE"``, ``"BCE"``, ``"SCE"``.
+        :param loss_type: Loss type.
             Default: ``CE``.
-        :param loss_sample_count (Optional[int]): Sample count to calculate loss.
+        :param loss_sample_count: Sample count to calculate loss.
             Suitable for ``"CE"`` and ``"BCE"`` loss functions.
             Default: ``None``.
         :param negative_sampling_strategy: Negative sampling strategy to calculate loss on sampled negatives.
@@ -74,6 +81,7 @@ class SasRec(lightning.LightningModule):
         """
         super().__init__()
         self.save_hyperparameters()
         self._model = SasRecModel(
             schema=tensor_schema,
             num_blocks=block_count,
@@ -102,7 +110,7 @@ class SasRec(lightning.LightningModule):
         self._vocab_size = item_count
         self.candidates_to_score = None
-    def training_step(self, batch: SasRecTrainingBatch, batch_idx: int) -> torch.Tensor:
+    def training_step(self, batch: Union[SasRecTrainingBatch, dict], batch_idx: int) -> torch.Tensor:
         """
         :param batch (SasRecTrainingBatch): Batch of training data.
         :param batch_idx (int): Batch index.
@@ -117,7 +125,7 @@ class SasRec(lightning.LightningModule):
     def predict_step(
         self,
-        batch: SasRecPredictionBatch,
+        batch: Union[SasRecPredictionBatch, dict],
         batch_idx: int,  # noqa: ARG002
         dataloader_idx: int = 0,  # noqa: ARG002
     ) -> torch.Tensor:
@@ -128,12 +136,23 @@ class SasRec(lightning.LightningModule):
         :returns: Calculated scores.
         """
+        if isinstance(batch, SasRecPredictionBatch):
+            warnings.warn(
+                "`SasRecPredictionBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         batch = _prepare_prediction_batch(self._schema, self._model.max_len, batch)
-        return self._model_predict(batch.features, batch.padding_mask)
+        return self._model_predict(
+            feature_tensors=batch["feature_tensor"],
+            padding_mask=batch["padding_mask"],
+        )
     def predict(
         self,
-        batch: SasRecPredictionBatch,
+        batch: Union[SasRecPredictionBatch, dict],
         candidates_to_score: Optional[torch.LongTensor] = None,
     ) -> torch.Tensor:
         """
@@ -143,8 +162,20 @@ class SasRec(lightning.LightningModule):
         :returns: Calculated scores.
         """
+        if isinstance(batch, SasRecPredictionBatch):
+            warnings.warn(
+                "`SasRecPredictionBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         batch = _prepare_prediction_batch(self._schema, self._model.max_len, batch)
-        return self._model_predict(batch.features, batch.padding_mask, candidates_to_score)
+        return self._model_predict(
+            feature_tensors=batch["feature_tensor"],
+            padding_mask=batch["padding_mask"],
+            candidates_to_score=candidates_to_score,
+        )
     def forward(
         self,
@@ -164,7 +195,7 @@ class SasRec(lightning.LightningModule):
     def validation_step(
         self,
-        batch: SasRecValidationBatch,
+        batch: Union[SasRecValidationBatch, dict],
         batch_idx: int,  # noqa: ARG002
         dataloader_idx: int = 0,  # noqa: ARG002
     ) -> torch.Tensor:
@@ -174,7 +205,19 @@ class SasRec(lightning.LightningModule):
         :returns: Calculated scores.
         """
-        return self._model_predict(batch.features, batch.padding_mask)
+        if isinstance(batch, SasRecValidationBatch):
+            warnings.warn(
+                "`SasRecValidationBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
+        return self._model_predict(
+            feature_tensors=batch["feature_tensor"],
+            padding_mask=batch["padding_mask"],
+        )
     def configure_optimizers(self) -> Any:
         """
@@ -197,10 +240,14 @@ class SasRec(lightning.LightningModule):
         model: SasRecModel
         model = cast(SasRecModel, self._model.module) if isinstance(self._model, torch.nn.DataParallel) else self._model
         candidates_to_score = self.candidates_to_score if candidates_to_score is None else candidates_to_score
-        scores = model.predict(feature_tensors, padding_mask, candidates_to_score)
+        scores = model.predict(
+            feature_tensor=feature_tensors,
+            padding_mask=padding_mask,
+            candidates_to_score=candidates_to_score,
+        )
         return scores
-    def _compute_loss(self, batch: SasRecTrainingBatch) -> torch.Tensor:
+    def _compute_loss(self, batch: Union[SasRecTrainingBatch, dict]) -> torch.Tensor:
         if self._loss_type == "BCE":
             loss_func = self._compute_loss_bce if self._loss_sample_count is None else self._compute_loss_bce_sampled
         elif self._loss_type == "CE":
@@ -211,11 +258,20 @@ class SasRec(lightning.LightningModule):
             msg = f"Not supported loss type: {self._loss_type}"
             raise ValueError(msg)
+        if isinstance(batch, SasRecTrainingBatch):
+            warnings.warn(
+                "`SasRecTrainingBatch` class will be removed in future versions. "
+                "Instead, you should use simple dictionary",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            batch = batch.convert_to_dict()
         loss = loss_func(
-            batch.features,
-            batch.labels,
-            batch.padding_mask,
-            batch.labels_padding_mask,
+            batch["feature_tensor"],
+            batch["positive_labels"],
+            batch["padding_mask"],
+            batch["target_padding_mask"],
         )
         return loss
@@ -258,7 +314,7 @@ class SasRec(lightning.LightningModule):
         padding_mask: torch.BoolTensor,
         target_padding_mask: torch.BoolTensor,
     ) -> torch.Tensor:
-        (positive_logits, negative_logits, *_) = self._get_sampled_logits(
+        positive_logits, negative_logits, *_ = self._get_sampled_logits(
             feature_tensors, positive_labels, padding_mask, target_padding_mask
         )
@@ -306,7 +362,7 @@ class SasRec(lightning.LightningModule):
         target_padding_mask: torch.BoolTensor,
     ) -> torch.Tensor:
         assert self._loss_sample_count is not None
-        (positive_logits, negative_logits, positive_labels, negative_labels, vocab_size) = self._get_sampled_logits(
+        positive_logits, negative_logits, positive_labels, negative_labels, vocab_size = self._get_sampled_logits(
             feature_tensors, positive_labels, padding_mask, target_padding_mask
         )
         n_negative_samples = min(self._loss_sample_count, vocab_size)
@@ -566,19 +622,23 @@ class SasRec(lightning.LightningModule):
 def _prepare_prediction_batch(
-    schema: TensorSchema, max_len: int, batch: SasRecPredictionBatch
-) -> SasRecPredictionBatch:
-    if batch.padding_mask.shape[1] > max_len:
+    schema: TensorSchema,
+    max_len: int,
+    batch: dict,
+) -> dict:
+    seq_len = batch["padding_mask"].shape[1]
+    if seq_len > max_len:
         msg = (
             "The length of the submitted sequence "
             "must not exceed the maximum length of the sequence. "
-            f"The length of the sequence is given {batch.padding_mask.shape[1]}, "
+            f"The length of the sequence is given {seq_len}, "
             f"while the maximum length is {max_len}"
         )
         raise ValueError(msg)
-    if batch.padding_mask.shape[1] < max_len:
-        query_id, padding_mask, features = batch
+    if seq_len < max_len:
+        padding_mask = batch["padding_mask"]
+        features = batch["feature_tensor"].copy()
         sequence_item_count = padding_mask.shape[1]
         for feature_name, feature_tensor in features.items():
             if schema[feature_name].is_cat:
@@ -592,5 +652,7 @@ def _prepare_prediction_batch(
                     value=0,
                 ).unsqueeze(-1)
         padding_mask = torch.nn.functional.pad(padding_mask, (max_len - sequence_item_count, 0), value=0)
-        batch = SasRecPredictionBatch(query_id, padding_mask, features)
+        batch["padding_mask"] = padding_mask
+        batch["feature_tensor"] = features
     return batch

replay-rec 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

replay-rec 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl