PyPI - replay-rec - Versions diffs - 0.18.0rc0__py3-none-any.whl → 0.18.1__py3-none-any.whl - Mend

replay-rec 0.18.0rc0py3-none-any.whl → 0.18.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

replay/__init__.py +1 -1
replay/data/dataset.py +27 -1
replay/data/dataset_utils/dataset_label_encoder.py +6 -3
replay/data/nn/schema.py +37 -16
replay/data/nn/sequence_tokenizer.py +313 -165
replay/data/nn/torch_sequential_dataset.py +17 -8
replay/data/nn/utils.py +14 -7
replay/data/schema.py +10 -6
replay/metrics/offline_metrics.py +2 -2
replay/models/__init__.py +1 -0
replay/models/base_rec.py +18 -21
replay/models/lin_ucb.py +407 -0
replay/models/nn/sequential/bert4rec/dataset.py +17 -4
replay/models/nn/sequential/bert4rec/lightning.py +121 -54
replay/models/nn/sequential/bert4rec/model.py +21 -0
replay/models/nn/sequential/callbacks/prediction_callbacks.py +5 -1
replay/models/nn/sequential/compiled/__init__.py +5 -0
replay/models/nn/sequential/compiled/base_compiled_model.py +261 -0
replay/models/nn/sequential/compiled/bert4rec_compiled.py +152 -0
replay/models/nn/sequential/compiled/sasrec_compiled.py +145 -0
replay/models/nn/sequential/postprocessors/postprocessors.py +27 -1
replay/models/nn/sequential/sasrec/dataset.py +17 -1
replay/models/nn/sequential/sasrec/lightning.py +126 -50
replay/models/nn/sequential/sasrec/model.py +3 -4
replay/preprocessing/__init__.py +7 -1
replay/preprocessing/discretizer.py +719 -0
replay/preprocessing/label_encoder.py +384 -52
replay/splitters/cold_user_random_splitter.py +1 -1
replay/utils/__init__.py +1 -0
replay/utils/common.py +7 -8
replay/utils/session_handler.py +3 -4
replay/utils/spark_utils.py +15 -1
replay/utils/types.py +8 -0
{replay_rec-0.18.0rc0.dist-info → replay_rec-0.18.1.dist-info}/METADATA +75 -70
{replay_rec-0.18.0rc0.dist-info → replay_rec-0.18.1.dist-info}/RECORD +37 -84
{replay_rec-0.18.0rc0.dist-info → replay_rec-0.18.1.dist-info}/WHEEL +1 -1
replay/experimental/__init__.py +0 -0
replay/experimental/metrics/__init__.py +0 -62
replay/experimental/metrics/base_metric.py +0 -602
replay/experimental/metrics/coverage.py +0 -97
replay/experimental/metrics/experiment.py +0 -175
replay/experimental/metrics/hitrate.py +0 -26
replay/experimental/metrics/map.py +0 -30
replay/experimental/metrics/mrr.py +0 -18
replay/experimental/metrics/ncis_precision.py +0 -31
replay/experimental/metrics/ndcg.py +0 -49
replay/experimental/metrics/precision.py +0 -22
replay/experimental/metrics/recall.py +0 -25
replay/experimental/metrics/rocauc.py +0 -49
replay/experimental/metrics/surprisal.py +0 -90
replay/experimental/metrics/unexpectedness.py +0 -76
replay/experimental/models/__init__.py +0 -10
replay/experimental/models/admm_slim.py +0 -205
replay/experimental/models/base_neighbour_rec.py +0 -204
replay/experimental/models/base_rec.py +0 -1271
replay/experimental/models/base_torch_rec.py +0 -234
replay/experimental/models/cql.py +0 -454
replay/experimental/models/ddpg.py +0 -923
replay/experimental/models/dt4rec/__init__.py +0 -0
replay/experimental/models/dt4rec/dt4rec.py +0 -189
replay/experimental/models/dt4rec/gpt1.py +0 -401
replay/experimental/models/dt4rec/trainer.py +0 -127
replay/experimental/models/dt4rec/utils.py +0 -265
replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
replay/experimental/models/extensions/spark_custom_models/als_extension.py +0 -792
replay/experimental/models/implicit_wrap.py +0 -131
replay/experimental/models/lightfm_wrap.py +0 -302
replay/experimental/models/mult_vae.py +0 -332
replay/experimental/models/neuromf.py +0 -406
replay/experimental/models/scala_als.py +0 -296
replay/experimental/nn/data/__init__.py +0 -1
replay/experimental/nn/data/schema_builder.py +0 -55
replay/experimental/preprocessing/__init__.py +0 -3
replay/experimental/preprocessing/data_preparator.py +0 -839
replay/experimental/preprocessing/padder.py +0 -229
replay/experimental/preprocessing/sequence_generator.py +0 -208
replay/experimental/scenarios/__init__.py +0 -1
replay/experimental/scenarios/obp_wrapper/__init__.py +0 -8
replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +0 -74
replay/experimental/scenarios/obp_wrapper/replay_offline.py +0 -248
replay/experimental/scenarios/obp_wrapper/utils.py +0 -87
replay/experimental/scenarios/two_stages/__init__.py +0 -0
replay/experimental/scenarios/two_stages/reranker.py +0 -117
replay/experimental/scenarios/two_stages/two_stages_scenario.py +0 -757
replay/experimental/utils/__init__.py +0 -0
replay/experimental/utils/logger.py +0 -24
replay/experimental/utils/model_handler.py +0 -186
replay/experimental/utils/session_handler.py +0 -44
replay_rec-0.18.0rc0.dist-info/NOTICE +0 -41
{replay_rec-0.18.0rc0.dist-info → replay_rec-0.18.1.dist-info}/LICENSE +0 -0

replay/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """ RecSys library """
-__version__ = "0.18.0.preview"
+__version__ = "0.18.1"

replay/data/dataset.py CHANGED Viewed

@@ -458,13 +458,23 @@ class Dataset:
             if feature.feature_hint in [FeatureHint.ITEM_ID, FeatureHint.QUERY_ID]:
                 return nunique(self._ids_feature_map[feature.feature_hint], column)
             assert feature.feature_source
+            if feature.feature_type == FeatureType.CATEGORICAL_LIST:
+                if self.is_spark:
+                    data = (
+                        self._feature_source_map[feature.feature_source]
+                        .select(column)
+                        .withColumn(column, sf.explode(column))
+                    )
+                else:
+                    data = self._feature_source_map[feature.feature_source][[column]].explode(column)
+                return nunique(data, column)
             return nunique(self._feature_source_map[feature.feature_source], column)
         return callback
     def _set_cardinality(self, features_list: Sequence[FeatureInfo]) -> None:
         for feature in features_list:
-            if feature.feature_type == FeatureType.CATEGORICAL:
+            if feature.feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST]:
                 feature._set_cardinality_callback(self._get_cardinality(feature))
     def _fill_feature_schema(self, feature_schema: FeatureSchema) -> FeatureSchema:
@@ -581,6 +591,7 @@ class Dataset:
         data: DataFrameLike,
         column: str,
         source: FeatureSource,
+        feature_type: FeatureType,
         cardinality: Optional[int],
     ) -> None:
         """
@@ -593,6 +604,16 @@ class Dataset:
         Option: Keep this criterion, but suggest the user to disable the check if he understands
         that the criterion will not pass.
         """
+        if feature_type == FeatureType.CATEGORICAL_LIST:  # explode column if list
+            data = data.withColumn(column, sf.explode(column)) if self.is_spark else data[[column]].explode(column)
+            if self.is_pandas:
+                try:
+                    data[column] = data[column].astype(int)
+                except Exception:
+                    msg = f"IDs in {source.name}.{column} are not encoded. They are not int."
+                    raise ValueError(msg)
         if self.is_pandas:
             is_int = np.issubdtype(dict(data.dtypes)[column], int)
         elif self.is_spark:
@@ -632,6 +653,7 @@ class Dataset:
                     self.interactions,
                     feature.column,
                     FeatureSource.INTERACTIONS,
+                    feature.feature_type,
                     feature.cardinality,
                 )
                 if self.item_features is not None:
@@ -639,6 +661,7 @@ class Dataset:
                         self.item_features,
                         feature.column,
                         FeatureSource.ITEM_FEATURES,
+                        feature.feature_type,
                         feature.cardinality,
                     )
             elif feature.feature_hint == FeatureHint.QUERY_ID:
@@ -646,6 +669,7 @@ class Dataset:
                     self.interactions,
                     feature.column,
                     FeatureSource.INTERACTIONS,
+                    feature.feature_type,
                     feature.cardinality,
                 )
                 if self.query_features is not None:
@@ -653,6 +677,7 @@ class Dataset:
                         self.query_features,
                         feature.column,
                         FeatureSource.QUERY_FEATURES,
+                        feature.feature_type,
                         feature.cardinality,
                     )
             else:
@@ -661,6 +686,7 @@ class Dataset:
                     data,
                     feature.column,
                     feature.feature_source,
+                    feature.feature_type,
                     feature.cardinality,
                 )

replay/data/dataset_utils/dataset_label_encoder.py CHANGED Viewed

@@ -8,8 +8,8 @@ Contains classes for encoding categorical data
 import warnings
 from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union
-from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource
-from replay.preprocessing import LabelEncoder, LabelEncodingRule
+from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
+from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
 from replay.preprocessing.label_encoder import HandleUnknownStrategies
@@ -62,7 +62,10 @@ class DatasetLabelEncoder:
         self._fill_features_columns(dataset.feature_schema)
         for column, feature_info in dataset.feature_schema.categorical_features.items():
-            encoding_rule = LabelEncodingRule(
+            encoding_rule_class = (
+                SequenceEncodingRule if feature_info.feature_type == FeatureType.CATEGORICAL_LIST else LabelEncodingRule
+            )
+            encoding_rule = encoding_rule_class(
                 column, handle_unknown=self._handle_unknown_rule, default_value=self._default_value_rule
             )
             if feature_info.feature_hint == FeatureHint.QUERY_ID:

replay/data/nn/schema.py CHANGED Viewed

@@ -70,6 +70,8 @@ class TensorFeatureInfo:
     Information about a tensor feature.
     """
+    DEFAULT_EMBEDDING_DIM = 64
     def __init__(
         self,
         name: str,
@@ -78,6 +80,7 @@ class TensorFeatureInfo:
         feature_hint: Optional[FeatureHint] = None,
         feature_sources: Optional[List[TensorFeatureSource]] = None,
         cardinality: Optional[int] = None,
+        padding_value: int = 0,
         embedding_dim: Optional[int] = None,
         tensor_dim: Optional[int] = None,
     ) -> None:
@@ -94,6 +97,7 @@ class TensorFeatureInfo:
         :param cardinality: cardinality of categorical feature, required for ids columns,
             optional for others,
             default: ``None``.
+        :param padding_value: value to pad sequences to desired length
         :param embedding_dim: embedding dimensions of categorical feature,
             default: ``None``.
         :param tensor_dim: tensor dimensions of numerical feature,
@@ -103,24 +107,24 @@ class TensorFeatureInfo:
         self._feature_hint = feature_hint
         self._feature_sources = feature_sources
         self._is_seq = is_seq
+        self._padding_value = padding_value
         if not isinstance(feature_type, FeatureType):
             msg = "Unknown feature type"
             raise ValueError(msg)
         self._feature_type = feature_type
-        if feature_type == FeatureType.NUMERICAL and (cardinality or embedding_dim):
+        if feature_type in [FeatureType.NUMERICAL, FeatureType.NUMERICAL_LIST] and (cardinality or embedding_dim):
             msg = "Cardinality and embedding dimensions are needed only with categorical feature type."
             raise ValueError(msg)
         self._cardinality = cardinality
-        if feature_type == FeatureType.CATEGORICAL and tensor_dim:
+        if feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST] and tensor_dim:
             msg = "Tensor dimensions is needed only with numerical feature type."
             raise ValueError(msg)
-        if feature_type == FeatureType.CATEGORICAL:
-            default_embedding_dim = 64
-            self._embedding_dim = embedding_dim or default_embedding_dim
+        if feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST]:
+            self._embedding_dim = embedding_dim or self.DEFAULT_EMBEDDING_DIM
         else:
             self._tensor_dim = tensor_dim
@@ -176,7 +180,8 @@ class TensorFeatureInfo:
     @property
     def is_seq(self) -> bool:
         """
-        :returns: Flag that feature is sequential.
+        :returns: Flag that feature is sequential.\n
+        Sequential means that the value of the feature will be determined for each element of the user's sequence.
         """
         return self._is_seq
@@ -185,21 +190,35 @@ class TensorFeatureInfo:
         """
         :returns: Flag that feature is categorical.
         """
-        return self.feature_type == FeatureType.CATEGORICAL
+        return self.feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST]
     @property
     def is_num(self) -> bool:
         """
         :returns: Flag that feature is numerical.
         """
-        return self.feature_type == FeatureType.NUMERICAL
+        return self.feature_type in [FeatureType.NUMERICAL, FeatureType.NUMERICAL_LIST]
+    @property
+    def is_list(self) -> bool:
+        """
+        :returns: Flag that feature is numerical list or categorical list.
+        """
+        return self.feature_type in [FeatureType.CATEGORICAL_LIST, FeatureType.NUMERICAL_LIST]
+    @property
+    def padding_value(self) -> int:
+        """
+        :returns: value to pad sequences to desired length.
+        """
+        return self._padding_value
     @property
     def cardinality(self) -> Optional[int]:
         """
         :returns: Cardinality of the feature.
         """
-        if self.feature_type != FeatureType.CATEGORICAL:
+        if not self.is_cat:
             msg = f"Can not get cardinality because feature type of {self.name} column is not categorical."
             raise RuntimeError(msg)
         return self._cardinality
@@ -212,7 +231,7 @@ class TensorFeatureInfo:
         """
         :returns: Dimensions of the numerical feature.
         """
-        if self.feature_type != FeatureType.NUMERICAL:
+        if not self.is_num:
             msg = f"Can not get tensor dimensions because feature type of {self.name} feature is not numerical."
             raise RuntimeError(msg)
         return self._tensor_dim
@@ -225,7 +244,7 @@ class TensorFeatureInfo:
         """
         :returns: Embedding dimensions of the feature.
         """
-        if self.feature_type != FeatureType.CATEGORICAL:
+        if not self.is_cat:
             msg = f"Can not get embedding dimensions because feature type of {self.name} feature is not categorical."
             raise RuntimeError(msg)
         return self._embedding_dim
@@ -317,14 +336,16 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
         """
         :returns: Sequence of categorical features in a schema.
         """
-        return self.filter(feature_type=FeatureType.CATEGORICAL)
+        return self.filter(feature_type=FeatureType.CATEGORICAL) + self.filter(
+            feature_type=FeatureType.CATEGORICAL_LIST
+        )
     @property
     def numerical_features(self) -> "TensorSchema":
         """
         :returns: Sequence of numerical features in a schema.
         """
-        return self.filter(feature_type=FeatureType.NUMERICAL)
+        return self.filter(feature_type=FeatureType.NUMERICAL) + self.filter(feature_type=FeatureType.NUMERICAL_LIST)
     @property
     def query_id_features(self) -> "TensorSchema":
@@ -423,9 +444,9 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
                     if feature.feature_sources
                     else None
                 ),
-                "cardinality": feature.cardinality if feature.feature_type == FeatureType.CATEGORICAL else None,
-                "embedding_dim": feature.embedding_dim if feature.feature_type == FeatureType.CATEGORICAL else None,
-                "tensor_dim": feature.tensor_dim if feature.feature_type == FeatureType.NUMERICAL else None,
+                "cardinality": feature.cardinality if feature.is_cat else None,
+                "embedding_dim": feature.embedding_dim if feature.is_cat else None,
+                "tensor_dim": feature.tensor_dim if feature.is_num else None,
             }
             for feature in self.all_features
         ]

replay-rec 0.18.0rc0__py3-none-any.whl → 0.18.1__py3-none-any.whl

replay-rec 0.18.0rc0py3-none-any.whl → 0.18.1py3-none-any.whl