PyPI - replay-rec - Versions diffs - 0.18.0__py3-none-any.whl → 0.18.1__py3-none-any.whl - Mend

replay-rec 0.18.0py3-none-any.whl → 0.18.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

replay/__init__.py +1 -1
replay/data/dataset.py +27 -1
replay/data/dataset_utils/dataset_label_encoder.py +6 -3
replay/data/nn/schema.py +37 -16
replay/data/nn/sequence_tokenizer.py +313 -165
replay/data/nn/torch_sequential_dataset.py +17 -8
replay/data/nn/utils.py +14 -7
replay/data/schema.py +10 -6
replay/metrics/offline_metrics.py +2 -2
replay/models/__init__.py +1 -0
replay/models/base_rec.py +18 -21
replay/models/lin_ucb.py +407 -0
replay/models/nn/sequential/bert4rec/dataset.py +17 -4
replay/models/nn/sequential/bert4rec/lightning.py +121 -54
replay/models/nn/sequential/bert4rec/model.py +21 -0
replay/models/nn/sequential/callbacks/prediction_callbacks.py +5 -1
replay/models/nn/sequential/compiled/__init__.py +5 -0
replay/models/nn/sequential/compiled/base_compiled_model.py +261 -0
replay/models/nn/sequential/compiled/bert4rec_compiled.py +152 -0
replay/models/nn/sequential/compiled/sasrec_compiled.py +145 -0
replay/models/nn/sequential/postprocessors/postprocessors.py +27 -1
replay/models/nn/sequential/sasrec/dataset.py +17 -1
replay/models/nn/sequential/sasrec/lightning.py +126 -50
replay/models/nn/sequential/sasrec/model.py +3 -4
replay/preprocessing/__init__.py +7 -1
replay/preprocessing/discretizer.py +719 -0
replay/preprocessing/label_encoder.py +384 -52
replay/splitters/cold_user_random_splitter.py +1 -1
replay/utils/__init__.py +1 -0
replay/utils/common.py +7 -8
replay/utils/session_handler.py +3 -4
replay/utils/spark_utils.py +15 -1
replay/utils/types.py +8 -0
{replay_rec-0.18.0.dist-info → replay_rec-0.18.1.dist-info}/METADATA +73 -60
{replay_rec-0.18.0.dist-info → replay_rec-0.18.1.dist-info}/RECORD +37 -31
{replay_rec-0.18.0.dist-info → replay_rec-0.18.1.dist-info}/LICENSE +0 -0
{replay_rec-0.18.0.dist-info → replay_rec-0.18.1.dist-info}/WHEEL +0 -0

replay/models/nn/sequential/compiled/bert4rec_compiled.py ADDED Viewed

@@ -0,0 +1,152 @@
+import pathlib
+from typing import Optional, Union, get_args
+import openvino as ov
+import torch
+from replay.data.nn import TensorSchema
+from replay.models.nn.sequential.bert4rec import (
+    Bert4Rec,
+    Bert4RecPredictionBatch,
+)
+from replay.models.nn.sequential.bert4rec.lightning import _prepare_prediction_batch
+from replay.models.nn.sequential.compiled.base_compiled_model import (
+    BaseCompiledModel,
+    OptimizedModeType,
+)
+class Bert4RecCompiled(BaseCompiledModel):
+    """
+    Bert4Rec CPU-optimized model for inference via OpenVINO.
+    It is recommended to compile model with ``compile`` method and pass ``Bert4Rec`` checkpoint
+    or the model object itself into it.
+    It is also possible to compile model by yourself and pass it to the ``__init__`` with ``TensorSchema``.
+    **Note** that compilation requires disk write (and maybe delete) permission.
+    """
+    def __init__(
+        self,
+        compiled_model: ov.CompiledModel,
+        schema: TensorSchema,
+    ) -> None:
+        """
+        :param compiled_model: Compiled model.
+        :param schema: Tensor schema of Bert4Rec model.
+        """
+        super().__init__(compiled_model, schema)
+    def predict(
+        self,
+        batch: Bert4RecPredictionBatch,
+        candidates_to_score: Optional[torch.LongTensor] = None,
+    ) -> torch.Tensor:
+        """
+        Inference on one batch.
+        :param batch: Prediction input.
+        :param candidates_to_score: Item ids to calculate scores.
+            Default: ``None``.
+        :return: Tensor with scores.
+        """
+        self._valilade_predict_input(batch, candidates_to_score)
+        batch = _prepare_prediction_batch(self._schema, self._max_seq_len, batch)
+        model_inputs = {
+            self._inputs_names[0]: batch.features[self._inputs_names[0]],
+            self._inputs_names[1]: batch.padding_mask,
+            self._inputs_names[2]: batch.tokens_mask,
+        }
+        if self._num_candidates_to_score is not None:
+            self._validate_candidates_to_score(candidates_to_score)
+            model_inputs[self._inputs_names[3]] = candidates_to_score
+        return torch.from_numpy(self._model(model_inputs)[self._output_name])
+    @classmethod
+    def compile(
+        cls,
+        model: Union[Bert4Rec, str, pathlib.Path],
+        mode: OptimizedModeType = "one_query",
+        batch_size: Optional[int] = None,
+        num_candidates_to_score: Optional[int] = None,
+        num_threads: Optional[int] = None,
+        onnx_path: Optional[str] = None,
+    ) -> "Bert4RecCompiled":
+        """
+        Model compilation.
+        :param model: Path to lightning Bert4Rec model saved in .ckpt format or the Bert4Rec object itself.
+        :param mode: Inference mode, defines shape of inputs.
+            Could be one of [``one_query``, ``batch``, ``dynamic_batch_size``].\n
+            ``one_query`` - sets input shape to [1, max_seq_len]\n
+            ``batch`` - sets input shape to [batch_size, max_seq_len]\n
+            ``dynamic_batch_size`` - sets batch_size to dynamic range [?, max_seq_len]\n
+            Default: ``one_query``.
+        :param batch_size: Batch size, required for ``batch`` mode.
+            Default: ``None``.
+        :param num_candidates_to_score: Number of item ids to calculate scores.
+            Could be one of [``None``, ``-1``, ``N``].\n
+            ``-1`` - sets candidates_to_score shape to dynamic range [1, ?]\n
+            ``N`` - sets candidates_to_score shape to [1, N]\n
+            ``None`` - disables candidates_to_score usage\n
+            Default: ``None``.
+        :param num_threads: Number of CPU threads to use.
+            Must be a natural number or ``None``.
+            If ``None``, then compiler will set this parameter automatically.
+            Default: ``None``.
+        :param onnx_path: Save ONNX model to path, if defined.
+            Default: ``None``.
+        """
+        if mode not in get_args(OptimizedModeType):
+            msg = f"Parameter ``mode`` could be one of {get_args(OptimizedModeType)}."
+            raise ValueError(msg)
+        num_candidates_to_score = Bert4RecCompiled._validate_num_candidates_to_score(num_candidates_to_score)
+        if isinstance(model, Bert4Rec):
+            lightning_model = model.cpu()
+        elif isinstance(model, (str, pathlib.Path)):
+            lightning_model = Bert4Rec.load_from_checkpoint(model, map_location=torch.device("cpu"))
+        schema = lightning_model._schema
+        item_seq_name = schema.item_id_feature_name
+        max_seq_len = lightning_model._model.max_len
+        batch_size, num_candidates_to_score = Bert4RecCompiled._get_input_params(
+            mode, batch_size, num_candidates_to_score
+        )
+        item_sequence = torch.zeros((1, max_seq_len)).long()
+        padding_mask = torch.zeros((1, max_seq_len)).bool()
+        tokens_mask = torch.zeros((1, max_seq_len)).bool()
+        model_input_names = [item_seq_name, "padding_mask", "tokens_mask"]
+        model_dynamic_axes_in_input = {
+            item_seq_name: {0: "batch_size", 1: "max_len"},
+            "padding_mask": {0: "batch_size", 1: "max_len"},
+            "tokens_mask": {0: "batch_size", 1: "max_len"},
+        }
+        if num_candidates_to_score:
+            candidates_to_score = torch.zeros((1,)).long()
+            model_input_names += ["candidates_to_score"]
+            model_dynamic_axes_in_input["candidates_to_score"] = {0: "num_candidates_to_score"}
+            model_input_sample = ({item_seq_name: item_sequence}, padding_mask, tokens_mask, candidates_to_score)
+        else:
+            model_input_sample = ({item_seq_name: item_sequence}, padding_mask, tokens_mask)
+        # Need to disable "Better Transformer" optimizations that interfere with the compilation process
+        if hasattr(torch.backends, "mha"):
+            torch.backends.mha.set_fastpath_enabled(value=False)
+        compiled_model = Bert4RecCompiled._run_model_compilation(
+            lightning_model,
+            model_input_sample,
+            model_input_names,
+            model_dynamic_axes_in_input,
+            batch_size,
+            num_candidates_to_score,
+            num_threads,
+            onnx_path,
+        )
+        return cls(compiled_model, schema)

replay/models/nn/sequential/compiled/sasrec_compiled.py ADDED Viewed

@@ -0,0 +1,145 @@
+import pathlib
+from typing import Optional, Union, get_args
+import openvino as ov
+import torch
+from replay.data.nn import TensorSchema
+from replay.models.nn.sequential.compiled.base_compiled_model import (
+    BaseCompiledModel,
+    OptimizedModeType,
+)
+from replay.models.nn.sequential.sasrec import (
+    SasRec,
+    SasRecPredictionBatch,
+)
+from replay.models.nn.sequential.sasrec.lightning import _prepare_prediction_batch
+class SasRecCompiled(BaseCompiledModel):
+    """
+    SasRec CPU-optimized model for inference via OpenVINO.
+    It is recommended to compile model with ``compile`` method and pass ``SasRec`` checkpoint
+    or the model object itself into it.
+    It is also possible to compile model by yourself and pass it to the ``__init__`` with ``TensorSchema``.
+    **Note** that compilation requires disk write (and maybe delete) permission.
+    """
+    def __init__(
+        self,
+        compiled_model: ov.CompiledModel,
+        schema: TensorSchema,
+    ) -> None:
+        """
+        :param compiled_model: Compiled model.
+        :param schema: Tensor schema of SasRec model.
+        """
+        super().__init__(compiled_model, schema)
+    def predict(
+        self,
+        batch: SasRecPredictionBatch,
+        candidates_to_score: Optional[torch.LongTensor] = None,
+    ) -> torch.Tensor:
+        """
+        Inference on one batch.
+        :param batch: Prediction input.
+        :param candidates_to_score: Item ids to calculate scores.
+            Default: ``None``.
+        :return: Tensor with scores.
+        """
+        self._valilade_predict_input(batch, candidates_to_score)
+        batch = _prepare_prediction_batch(self._schema, self._max_seq_len, batch)
+        model_inputs = {
+            self._inputs_names[0]: batch.features[self._inputs_names[0]],
+            self._inputs_names[1]: batch.padding_mask,
+        }
+        if self._num_candidates_to_score is not None:
+            self._validate_candidates_to_score(candidates_to_score)
+            model_inputs[self._inputs_names[2]] = candidates_to_score
+        return torch.from_numpy(self._model(model_inputs)[self._output_name])
+    @classmethod
+    def compile(
+        cls,
+        model: Union[SasRec, str, pathlib.Path],
+        mode: OptimizedModeType = "one_query",
+        batch_size: Optional[int] = None,
+        num_candidates_to_score: Optional[int] = None,
+        num_threads: Optional[int] = None,
+        onnx_path: Optional[str] = None,
+    ) -> "SasRecCompiled":
+        """
+        Model compilation.
+        :param model: Path to lightning SasRec model saved in .ckpt format or the SasRec object itself.
+        :param mode: Inference mode, defines shape of inputs.
+            Could be one of [``one_query``, ``batch``, ``dynamic_batch_size``].\n
+            ``one_query`` - sets input shape to [1, max_seq_len]\n
+            ``batch`` - sets input shape to [batch_size, max_seq_len]\n
+            ``dynamic_batch_size`` - sets batch_size to dynamic range [?, max_seq_len]\n
+            Default: ``one_query``.
+        :param batch_size: Batch size, required for ``batch`` mode.
+            Default: ``None``.
+        :param num_candidates_to_score: Number of item ids to calculate scores.
+            Could be one of [``None``, ``-1``, ``N``].\n
+            ``-1`` - sets candidates_to_score shape to dynamic range [1, ?]\n
+            ``N`` - sets candidates_to_score shape to [1, N]\n
+            ``None`` - disable candidates_to_score usage\n
+            Default: ``None``.
+        :param num_threads: Number of CPU threads to use.
+            Must be a natural number or ``None``.
+            If ``None``, then compiler will set this parameter automatically.
+            Default: ``None``.
+        :param onnx_path: Save ONNX model to path, if defined.
+            Default: ``None``.
+        """
+        if mode not in get_args(OptimizedModeType):
+            msg = f"Parameter ``mode`` could be one of {get_args(OptimizedModeType)}."
+            raise ValueError(msg)
+        num_candidates_to_score = SasRecCompiled._validate_num_candidates_to_score(num_candidates_to_score)
+        if isinstance(model, SasRec):
+            lightning_model = model.cpu()
+        elif isinstance(model, (str, pathlib.Path)):
+            lightning_model = SasRec.load_from_checkpoint(model, map_location=torch.device("cpu"))
+        schema = lightning_model._schema
+        item_seq_name = schema.item_id_feature_name
+        max_seq_len = lightning_model._model.max_len
+        batch_size, num_candidates_to_score = SasRecCompiled._get_input_params(
+            mode, batch_size, num_candidates_to_score
+        )
+        item_sequence = torch.zeros((1, max_seq_len)).long()
+        padding_mask = torch.zeros((1, max_seq_len)).bool()
+        model_input_names = [item_seq_name, "padding_mask"]
+        model_dynamic_axes_in_input = {
+            item_seq_name: {0: "batch_size", 1: "max_len"},
+            "padding_mask": {0: "batch_size", 1: "max_len"},
+        }
+        if num_candidates_to_score:
+            candidates_to_score = torch.zeros((1,)).long()
+            model_input_names += ["candidates_to_score"]
+            model_dynamic_axes_in_input["candidates_to_score"] = {0: "num_candidates_to_score"}
+            model_input_sample = ({item_seq_name: item_sequence}, padding_mask, candidates_to_score)
+        else:
+            model_input_sample = ({item_seq_name: item_sequence}, padding_mask)
+        compiled_model = SasRecCompiled._run_model_compilation(
+            lightning_model,
+            model_input_sample,
+            model_input_names,
+            model_dynamic_axes_in_input,
+            batch_size,
+            num_candidates_to_score,
+            num_threads,
+            onnx_path,
+        )
+        return cls(compiled_model, schema)

replay/models/nn/sequential/postprocessors/postprocessors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional, Set, Tuple, cast
+from typing import List, Optional, Set, Tuple, Union, cast
 import numpy as np
 import pandas as pd
@@ -17,6 +17,8 @@ class RemoveSeenItems(BasePostProcessor):
     def __init__(self, sequential: SequentialDataset) -> None:
         super().__init__()
         self._sequential = sequential
+        self._apply_candidates = False
+        self._candidates = None
     def on_validation(
         self, query_ids: torch.LongTensor, scores: torch.Tensor, ground_truth: torch.LongTensor
@@ -30,6 +32,7 @@ class RemoveSeenItems(BasePostProcessor):
         :returns: modified query ids and scores and ground truth dataset
         """
+        self._apply_candidates = False
         modified_scores = self._compute_scores(query_ids, scores)
         return query_ids, modified_scores, ground_truth
@@ -42,6 +45,7 @@ class RemoveSeenItems(BasePostProcessor):
         :returns: modified query ids and scores
         """
+        self._apply_candidates = True
         modified_scores = self._compute_scores(query_ids, scores)
         return query_ids, modified_scores
@@ -56,6 +60,13 @@ class RemoveSeenItems(BasePostProcessor):
         value: float,
     ) -> torch.Tensor:
         flat_item_ids_on_device = flat_item_ids.to(scores.device)
+        if self._apply_candidates and self._candidates is not None:
+            item_count = self._sequential.schema.item_id_features.item().cardinality
+            assert item_count
+            _scores = torch.full((scores.shape[0], item_count), -float("inf")).to(scores.device)
+            _scores[:, self._candidates] = torch.reshape(scores, _scores[:, self._candidates].shape)
+            scores = _scores
         if scores.is_contiguous():
             scores.view(-1)[flat_item_ids_on_device] = value
         else:
@@ -80,6 +91,21 @@ class RemoveSeenItems(BasePostProcessor):
         flat_seen_item_ids_np = np.concatenate(item_id_sequences)
         return torch.LongTensor(flat_seen_item_ids_np)
+    @property
+    def candidates(self) -> Union[torch.LongTensor, None]:
+        """
+        Returns tensor of item ids to calculate scores.
+        """
+        return self._candidates
+    @candidates.setter
+    def candidates(self, candidates: Optional[torch.LongTensor] = None) -> None:
+        """
+        Sets tensor of item ids to calculate scores.
+        :param candidates: Tensor of item ids to calculate scores.
+        """
+        self._candidates = candidates
 class SampleItems(BasePostProcessor):
     """

replay/models/nn/sequential/sasrec/dataset.py CHANGED Viewed

@@ -10,6 +10,7 @@ from replay.data.nn import (
     TorchSequentialDataset,
     TorchSequentialValidationDataset,
 )
+from replay.utils.model_handler import deprecation_warning
 class SasRecTrainingBatch(NamedTuple):
@@ -30,6 +31,10 @@ class SasRecTrainingDataset(TorchDataset):
     Dataset that generates samples to train SasRec-like model
     """
+    @deprecation_warning(
+        "`padding_value` parameter will be removed in future versions. "
+        "Instead, you should specify `padding_value` for each column in TensorSchema"
+    )
     def __init__(
         self,
         sequential: SequentialDataset,
@@ -90,7 +95,10 @@ class SasRecTrainingDataset(TorchDataset):
         output_features: MutableTensorMap = {}
         for feature_name in self._schema:
-            output_features[feature_name] = features[feature_name][: -self._sequence_shift]
+            feature = features[feature_name]
+            if self._schema[feature_name].is_seq:
+                feature = feature[: -self._sequence_shift]
+            output_features[feature_name] = feature
         output_features_padding_mask = padding_mask[: -self._sequence_shift]
@@ -119,6 +127,10 @@ class SasRecPredictionDataset(TorchDataset):
     Dataset that generates samples to infer SasRec-like model
     """
+    @deprecation_warning(
+        "`padding_value` parameter will be removed in future versions. "
+        "Instead, you should specify `padding_value` for each column in TensorSchema"
+    )
     def __init__(
         self,
         sequential: SequentialDataset,
@@ -167,6 +179,10 @@ class SasRecValidationDataset(TorchDataset):
     Dataset that generates samples to infer and validate SasRec-like model
     """
+    @deprecation_warning(
+        "`padding_value` parameter will be removed in future versions. "
+        "Instead, you should specify `padding_value` for each column in TensorSchema"
+    )
     def __init__(
         self,
         sequential: SequentialDataset,

replay/models/nn/sequential/sasrec/lightning.py CHANGED Viewed

@@ -33,7 +33,7 @@ class SasRec(lightning.LightningModule):
         loss_sample_count: Optional[int] = None,
         negative_sampling_strategy: str = "global_uniform",
         negatives_sharing: bool = False,
-        optimizer_factory: Optional[OptimizerFactory] = None,
+        optimizer_factory: OptimizerFactory = FatOptimizerFactory(),
         lr_scheduler_factory: Optional[LRSchedulerFactory] = None,
     ):
         """
@@ -63,7 +63,7 @@ class SasRec(lightning.LightningModule):
         :param negatives_sharing: Apply negative sharing in calculating sampled logits.
             Default: ``False``.
         :param optimizer_factory: Optimizer factory.
-            Default: ``None``.
+            Default: ``FatOptimizerFactory``.
         :param lr_scheduler_factory: Learning rate schedule factory.
             Default: ``None``.
         """
@@ -92,6 +92,7 @@ class SasRec(lightning.LightningModule):
         item_count = tensor_schema.item_id_features.item().cardinality
         assert item_count
         self._vocab_size = item_count
+        self.candidates_to_score = None
     def training_step(self, batch: SasRecTrainingBatch, batch_idx: int) -> torch.Tensor:
         """
@@ -106,30 +107,58 @@ class SasRec(lightning.LightningModule):
         self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, sync_dist=True)
         return loss
-    def forward(self, feature_tensors: TensorMap, padding_mask: torch.BoolTensor) -> torch.Tensor:  # pragma: no cover
+    def predict_step(
+        self,
+        batch: SasRecPredictionBatch,
+        batch_idx: int,  # noqa: ARG002
+        dataloader_idx: int = 0,  # noqa: ARG002
+    ) -> torch.Tensor:
         """
-        :param feature_tensors: Batch of features.
-        :param padding_mask: Padding mask where 0 - <PAD>, 1 otherwise.
+        :param batch: Batch of prediction data.
+        :param batch_idx: Batch index.
+        :param dataloader_idx: Dataloader index.
         :returns: Calculated scores.
         """
-        return self._model_predict(feature_tensors, padding_mask)
+        batch = _prepare_prediction_batch(self._schema, self._model.max_len, batch)
+        return self._model_predict(batch.features, batch.padding_mask)
-    def predict_step(
-        self, batch: SasRecPredictionBatch, batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
+    def predict(
+        self,
+        batch: SasRecPredictionBatch,
+        candidates_to_score: Optional[torch.LongTensor] = None,
     ) -> torch.Tensor:
         """
         :param batch: Batch of prediction data.
-        :param batch_idx: Batch index.
-        :param dataloader_idx: Dataloader index.
+        :param candidates_to_score: Item ids to calculate scores.
+            Default: ``None``.
         :returns: Calculated scores.
         """
-        batch = self._prepare_prediction_batch(batch)
-        return self._model_predict(batch.features, batch.padding_mask)
+        batch = _prepare_prediction_batch(self._schema, self._model.max_len, batch)
+        return self._model_predict(batch.features, batch.padding_mask, candidates_to_score)
+    def forward(
+        self,
+        feature_tensors: TensorMap,
+        padding_mask: torch.BoolTensor,
+        candidates_to_score: Optional[torch.LongTensor] = None,
+    ) -> torch.Tensor:  # pragma: no cover
+        """
+        :param feature_tensors: Batch of features.
+        :param padding_mask: Padding mask where 0 - <PAD>, 1 otherwise.
+        :param candidates_to_score: Item ids to calculate scores.
+            Default: ``None``.
+        :returns: Calculated scores.
+        """
+        return self._model_predict(feature_tensors, padding_mask, candidates_to_score)
     def validation_step(
-        self, batch: SasRecValidationBatch, batch_idx: int, dataloader_idx: int = 0  # noqa: ARG002
+        self,
+        batch: SasRecValidationBatch,
+        batch_idx: int,  # noqa: ARG002
+        dataloader_idx: int = 0,  # noqa: ARG002
     ) -> torch.Tensor:
         """
         :param batch (SasRecValidationBatch): Batch of prediction data.
@@ -143,8 +172,7 @@ class SasRec(lightning.LightningModule):
         """
         :returns: Configured optimizer and lr scheduler.
         """
-        optimizer_factory = self._optimizer_factory or FatOptimizerFactory()
-        optimizer = optimizer_factory.create(self._model.parameters())
+        optimizer = self._optimizer_factory.create(self._model.parameters())
         if self._lr_scheduler_factory is None:
             return optimizer
@@ -152,38 +180,16 @@ class SasRec(lightning.LightningModule):
         lr_scheduler = self._lr_scheduler_factory.create(optimizer)
         return [optimizer], [lr_scheduler]
-    def _prepare_prediction_batch(self, batch: SasRecPredictionBatch) -> SasRecPredictionBatch:
-        if batch.padding_mask.shape[1] > self._model.max_len:
-            msg = f"The length of the submitted sequence \
-                must not exceed the maximum length of the sequence. \
-                The length of the sequence is given {batch.padding_mask.shape[1]}, \
-                while the maximum length is {self._model.max_len}"
-            raise ValueError(msg)
-        if batch.padding_mask.shape[1] < self._model.max_len:
-            query_id, padding_mask, features = batch
-            sequence_item_count = padding_mask.shape[1]
-            for feature_name, feature_tensor in features.items():
-                if self._schema[feature_name].is_cat:
-                    features[feature_name] = torch.nn.functional.pad(
-                        feature_tensor, (self._model.max_len - sequence_item_count, 0), value=0
-                    )
-                else:
-                    features[feature_name] = torch.nn.functional.pad(
-                        feature_tensor.view(feature_tensor.size(0), feature_tensor.size(1)),
-                        (self._model.max_len - sequence_item_count, 0),
-                        value=0,
-                    ).unsqueeze(-1)
-            padding_mask = torch.nn.functional.pad(
-                padding_mask, (self._model.max_len - sequence_item_count, 0), value=0
-            )
-            batch = SasRecPredictionBatch(query_id, padding_mask, features)
-        return batch
-    def _model_predict(self, feature_tensors: TensorMap, padding_mask: torch.BoolTensor) -> torch.Tensor:
+    def _model_predict(
+        self,
+        feature_tensors: TensorMap,
+        padding_mask: torch.BoolTensor,
+        candidates_to_score: torch.LongTensor = None,
+    ) -> torch.Tensor:
         model: SasRecModel
         model = cast(SasRecModel, self._model.module) if isinstance(self._model, torch.nn.DataParallel) else self._model
-        scores = model.predict(feature_tensors, padding_mask)
+        candidates_to_score = self.candidates_to_score if candidates_to_score is None else candidates_to_score
+        scores = model.predict(feature_tensors, padding_mask, candidates_to_score)
         return scores
     def _compute_loss(self, batch: SasRecTrainingBatch) -> torch.Tensor:
@@ -479,6 +485,50 @@ class SasRec(lightning.LightningModule):
         self._set_new_item_embedder_to_model(new_embedding, new_vocab_size)
+    @property
+    def optimizer_factory(self) -> OptimizerFactory:
+        """
+        Returns current optimizer_factory.
+        """
+        return self._optimizer_factory
+    @optimizer_factory.setter
+    def optimizer_factory(self, optimizer_factory: OptimizerFactory) -> None:
+        """
+        Sets new optimizer_factory.
+        :param optimizer_factory: New optimizer factory.
+        """
+        if isinstance(optimizer_factory, OptimizerFactory):
+            self._optimizer_factory = optimizer_factory
+        else:
+            msg = f"Expected optimizer_factory of type OptimizerFactory, got {type(optimizer_factory)}"
+            raise ValueError(msg)
+    @property
+    def candidates_to_score(self) -> Union[torch.LongTensor, None]:
+        """
+        Returns tensor of item ids to calculate scores.
+        """
+        return self._candidates_to_score
+    @candidates_to_score.setter
+    def candidates_to_score(self, candidates: Optional[torch.LongTensor] = None) -> None:
+        """
+        Sets tensor of item ids to calculate scores.
+        :param candidates: Tensor of item ids to calculate scores.
+        """
+        total_item_count = self._model.item_count
+        if isinstance(candidates, torch.Tensor) and candidates.dtype is torch.long:
+            if 0 < candidates.shape[0] <= total_item_count:
+                self._candidates_to_score = candidates
+            else:
+                msg = f"Expected candidates length to be between 1 and {total_item_count=}"
+                raise ValueError(msg)
+        elif candidates is not None:
+            msg = f"Expected candidates to be of type torch.LongTensor or None, gpt {type(candidates)}"
+            raise ValueError(msg)
+        self._candidates_to_score = candidates
     def _set_new_item_embedder_to_model(self, new_embedding: torch.nn.Embedding, new_vocab_size: int):
         self._model.item_embedder.item_emb = new_embedding
         self._model._head._item_embedder = self._model.item_embedder
@@ -486,11 +536,37 @@ class SasRec(lightning.LightningModule):
         self._model.item_count = new_vocab_size
         self._model.padding_idx = new_vocab_size
         self._model.masking.padding_idx = new_vocab_size
-        self._model.candidates_to_score = torch.tensor(
-            list(range(new_embedding.weight.data.shape[0] - 1)),
-            device=self._model.candidates_to_score.device,
-            dtype=torch.long,
-        )
         self._schema.item_id_features[self._schema.item_id_feature_name]._set_cardinality(
             new_embedding.weight.data.shape[0] - 1
         )
+def _prepare_prediction_batch(
+    schema: TensorSchema, max_len: int, batch: SasRecPredictionBatch
+) -> SasRecPredictionBatch:
+    if batch.padding_mask.shape[1] > max_len:
+        msg = (
+            "The length of the submitted sequence "
+            "must not exceed the maximum length of the sequence. "
+            f"The length of the sequence is given {batch.padding_mask.shape[1]}, "
+            f"while the maximum length is {max_len}"
+        )
+        raise ValueError(msg)
+    if batch.padding_mask.shape[1] < max_len:
+        query_id, padding_mask, features = batch
+        sequence_item_count = padding_mask.shape[1]
+        for feature_name, feature_tensor in features.items():
+            if schema[feature_name].is_cat:
+                features[feature_name] = torch.nn.functional.pad(
+                    feature_tensor, (max_len - sequence_item_count, 0), value=0
+                )
+            else:
+                features[feature_name] = torch.nn.functional.pad(
+                    feature_tensor.view(feature_tensor.size(0), feature_tensor.size(1)),
+                    (max_len - sequence_item_count, 0),
+                    value=0,
+                ).unsqueeze(-1)
+        padding_mask = torch.nn.functional.pad(padding_mask, (max_len - sequence_item_count, 0), value=0)
+        batch = SasRecPredictionBatch(query_id, padding_mask, features)
+    return batch

replay-rec 0.18.0__py3-none-any.whl → 0.18.1__py3-none-any.whl

replay-rec 0.18.0py3-none-any.whl → 0.18.1py3-none-any.whl