PyPI - replay-rec - Versions diffs - 0.21.0rc0__tar.gz → 0.21.1__tar.gz - Mend

replay-rec 0.21.0rc0tar.gz → 0.21.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (278) hide show

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: replay-rec
-Version: 0.21.0rc0
+Version: 0.21.1
 Summary: RecSys Library
 License-Expression: Apache-2.0
 License-File: LICENSE
@@ -14,23 +14,29 @@ Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
 Classifier: Natural Language :: English
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Dist: d3rlpy (>=2.8.1,<2.9)
-Requires-Dist: implicit (>=0.7.2,<0.8)
-Requires-Dist: lightautoml (>=0.4.1,<0.5)
-Requires-Dist: lightning (>=2.0.2,<=2.4.0)
-Requires-Dist: numba (>=0.50,<1)
+Provides-Extra: spark
+Provides-Extra: torch
+Provides-Extra: torch-cpu
+Requires-Dist: lightning (<2.6.0) ; extra == "torch" or extra == "torch-cpu"
+Requires-Dist: lightning ; extra == "torch"
+Requires-Dist: lightning ; extra == "torch-cpu"
 Requires-Dist: numpy (>=1.20.0,<2)
 Requires-Dist: pandas (>=1.3.5,<2.4.0)
 Requires-Dist: polars (<2.0)
-Requires-Dist: psutil (<=7.0.0)
+Requires-Dist: psutil (<=7.0.0) ; extra == "spark"
+Requires-Dist: psutil ; extra == "spark"
 Requires-Dist: pyarrow (<22.0)
-Requires-Dist: pyspark (>=3.0,<3.5)
-Requires-Dist: pytorch-optimizer (>=3.8.0,<4)
-Requires-Dist: sb-obp (>=0.5.10,<0.6)
+Requires-Dist: pyspark (>=3.0,<3.5) ; extra == "spark"
+Requires-Dist: pyspark ; extra == "spark"
+Requires-Dist: pytorch-optimizer (>=3.8.0,<3.9.0) ; extra == "torch" or extra == "torch-cpu"
+Requires-Dist: pytorch-optimizer ; extra == "torch"
+Requires-Dist: pytorch-optimizer ; extra == "torch-cpu"
 Requires-Dist: scikit-learn (>=1.6.1,<1.7.0)
 Requires-Dist: scipy (>=1.8.1,<2.0.0)
 Requires-Dist: setuptools
-Requires-Dist: torch (>=1.8,<3.0.0)
+Requires-Dist: torch (>=1.8,<3.0.0) ; extra == "torch" or extra == "torch-cpu"
+Requires-Dist: torch ; extra == "torch"
+Requires-Dist: torch ; extra == "torch-cpu"
 Requires-Dist: tqdm (>=4.67,<5)
 Project-URL: Homepage, https://sb-ai-lab.github.io/RePlay/
 Project-URL: Repository, https://github.com/sb-ai-lab/RePlay

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/pyproject.toml RENAMED Viewed

@@ -65,19 +65,19 @@ dependencies = [
     "scikit-learn (>=1.6.1,<1.7.0)",
     "pyarrow (<22.0)",
     "tqdm (>=4.67,<5)",
-    "torch (>=1.8,<3.0.0)",
-    "lightning (>=2.0.2,<=2.4.0)",
-    "pytorch-optimizer (>=3.8.0,<4)",
-    "lightautoml (>=0.4.1,<0.5)",
-    "numba (>=0.50,<1)",
-    "sb-obp (>=0.5.10,<0.6)",
-    "d3rlpy (>=2.8.1,<2.9)",
-    "implicit (>=0.7.2,<0.8)",
-    "pyspark (>=3.0,<3.5)",
-    "psutil (<=7.0.0)",
+    "pyspark (>=3.0,<3.5); extra == 'spark'",
+    "psutil (<=7.0.0); extra == 'spark'",
+    "torch (>=1.8, <3.0.0); extra == 'torch' or extra == 'torch-cpu'",
+    "pytorch-optimizer (>=3.8.0,<3.9.0); extra == 'torch' or extra == 'torch-cpu'",
+    "lightning (<2.6.0); extra == 'torch' or extra == 'torch-cpu'",
 ]
 dynamic = ["dependencies"]
-version = "0.21.0.preview"
+version = "0.21.1"
+[project.optional-dependencies]
+spark = ["pyspark", "psutil"]
+torch = ["torch", "pytorch-optimizer", "lightning"]
+torch-cpu = ["torch", "pytorch-optimizer", "lightning"]
 [project.urls]
 homepage = "https://sb-ai-lab.github.io/RePlay/"
@@ -91,6 +91,13 @@ target-version = ["py39", "py310", "py311", "py312"]
 packages = [{include = "replay"}]
 exclude = [
     "replay/conftest.py",
+    "replay/experimental",
+]
+[tool.poetry.dependencies]
+torch = [
+    {markers = "extra == 'torch-cpu' and extra !='torch'", source = "torch-cpu-mirror"},
+    {markers = "extra == 'torch' and extra !='torch-cpu'", source = "PyPI"},
 ]
 [[tool.poetry.source]]
@@ -100,7 +107,7 @@ priority = "explicit"
 [tool.poetry-dynamic-versioning]
 enable = false
-format-jinja = """0.21.0{{ env['PACKAGE_SUFFIX'] }}"""
+format-jinja = """0.21.1{{ env['PACKAGE_SUFFIX'] }}"""
 vcs = "git"
 [tool.ruff]

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/__init__.py RENAMED Viewed

@@ -4,4 +4,4 @@
 # functionality removed in Python 3.12 is used in downstream packages (like lightfm)
 import setuptools as _
-__version__ = "0.21.0.preview"
+__version__ = "0.21.1"

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/data/nn/parquet/parquet_module.py RENAMED Viewed

@@ -94,7 +94,7 @@ class ParquetModule(L.LightningDataModule):
         missing_splits = [split_name for split_name, split_path in self.datapaths.items() if split_path is None]
         if missing_splits:
             msg = (
-                f"The following dataset paths aren't provided: {','.join(missing_splits)}."
+                f"The following dataset paths aren't provided: {','.join(missing_splits)}. "
                 "Make sure to disable these stages in your Lightning Trainer configuration."
             )
             warnings.warn(msg, stacklevel=2)

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/metrics/torch_metrics_builder.py RENAMED Viewed

@@ -400,7 +400,7 @@ def metrics_to_df(metrics: Mapping[str, float]) -> PandasDataFrame:
     metric_name_and_k = metrics_df["m"].str.split("@", expand=True)
     metrics_df["metric"] = metric_name_and_k[0]
-    metrics_df["k"] = metric_name_and_k[1]
+    metrics_df["k"] = metric_name_and_k[1].astype(int)
     pivoted_metrics = metrics_df.pivot(index="metric", columns="k", values="v")
     pivoted_metrics.index.name = None

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/models/nn/sequential/callbacks/validation_callback.py RENAMED Viewed

@@ -162,14 +162,24 @@ class ValidationMetricsCallback(lightning.Callback):
         @rank_zero_only
         def print_metrics() -> None:
             metrics = {}
             for name, value in trainer.logged_metrics.items():
                 if "@" in name:
                     metrics[name] = value.item()
-            if metrics:
-                metrics_df = metrics_to_df(metrics)
+            if not metrics:
+                return
-                print(metrics_df)  # noqa: T201
-                print()  # noqa: T201
+            if len(self._dataloaders_size) > 1:
+                for i in range(len(self._dataloaders_size)):
+                    suffix = trainer._results.DATALOADER_SUFFIX.format(i)[1:]
+                    cur_dataloader_metrics = {k.split("/")[0]: v for k, v in metrics.items() if suffix in k}
+                    metrics_df = metrics_to_df(cur_dataloader_metrics)
+                    print(suffix)  # noqa: T201
+                    print(metrics_df, "\n")  # noqa: T201
+            else:
+                metrics_df = metrics_to_df(metrics)
+                print(metrics_df, "\n")  # noqa: T201
         print_metrics()

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/lightning/callback/metrics_callback.py RENAMED Viewed

@@ -2,7 +2,6 @@ from typing import Any, Optional
 import lightning
 import torch
-from lightning.pytorch.utilities.combined_loader import CombinedLoader
 from lightning.pytorch.utilities.rank_zero import rank_zero_only
 from replay.metrics.torch_metrics_builder import (
@@ -64,8 +63,8 @@ class ComputeMetricsCallback(lightning.Callback):
         self._train_column = train_column
     def _get_dataloaders_size(self, dataloaders: Optional[Any]) -> list[int]:
-        if isinstance(dataloaders, CombinedLoader):
-            return [len(dataloader) for dataloader in dataloaders.flattened]  # pragma: no cover
+        if isinstance(dataloaders, list):
+            return [len(dataloader) for dataloader in dataloaders]
         return [len(dataloaders)]
     def on_validation_epoch_start(
@@ -123,7 +122,7 @@ class ComputeMetricsCallback(lightning.Callback):
         batch: dict,
         batch_idx: int,
         dataloader_idx: int = 0,
-    ) -> None:  # pragma: no cover
+    ) -> None:
         self._batch_end(
             trainer,
             pl_module,
@@ -159,7 +158,7 @@ class ComputeMetricsCallback(lightning.Callback):
     def on_validation_epoch_end(self, trainer: lightning.Trainer, pl_module: LightningModule) -> None:
         self._epoch_end(trainer, pl_module)
-    def on_test_epoch_end(self, trainer: lightning.Trainer, pl_module: LightningModule) -> None:  # pragma: no cover
+    def on_test_epoch_end(self, trainer: lightning.Trainer, pl_module: LightningModule) -> None:
         self._epoch_end(trainer, pl_module)
     def _epoch_end(
@@ -170,14 +169,24 @@ class ComputeMetricsCallback(lightning.Callback):
         @rank_zero_only
         def print_metrics() -> None:
             metrics = {}
             for name, value in trainer.logged_metrics.items():
                 if "@" in name:
                     metrics[name] = value.item()
-            if metrics:
-                metrics_df = metrics_to_df(metrics)
+            if not metrics:
+                return
-                print(metrics_df)  # noqa: T201
-                print()  # noqa: T201
+            if len(self._dataloaders_size) > 1:
+                for i in range(len(self._dataloaders_size)):
+                    suffix = trainer._results.DATALOADER_SUFFIX.format(i)[1:]
+                    cur_dataloader_metrics = {k.split("/")[0]: v for k, v in metrics.items() if suffix in k}
+                    metrics_df = metrics_to_df(cur_dataloader_metrics)
+                    print(suffix)  # noqa: T201
+                    print(metrics_df, "\n")  # noqa: T201
+            else:
+                metrics_df = metrics_to_df(metrics)
+                print(metrics_df, "\n")  # noqa: T201
         print_metrics()

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/lightning/callback/predictions_callback.py RENAMED Viewed

@@ -15,11 +15,11 @@ from replay.utils import (
     SparkDataFrame,
 )
-if PYSPARK_AVAILABLE:  # pragma: no cover
+if PYSPARK_AVAILABLE:
     import pyspark.sql.functions as sf
     from pyspark.sql import SparkSession
     from pyspark.sql.types import ArrayType, DoubleType, IntegerType, StructType
-else:  # pragma: no cover
+else:
     SparkSession = MissingImport

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/loss/base.py RENAMED Viewed

@@ -85,7 +85,7 @@ class SampledLossBase(torch.nn.Module):
             # [batch_size, num_negatives] -> [batch_size, 1, num_negatives]
             negative_labels = negative_labels.unsqueeze(1).repeat(1, seq_len, 1)
-        if negative_labels.dim() == 3:  # pragma: no cover
+        if negative_labels.dim() == 3:
             # [batch_size, seq_len, num_negatives] -> [batch_size, seq_len, 1, num_negatives]
             negative_labels = negative_labels.unsqueeze(-2)
             if num_positives != 1:
@@ -119,7 +119,7 @@ class SampledLossBase(torch.nn.Module):
         positive_labels = positive_labels[target_padding_mask].unsqueeze(-1)
         assert positive_labels.size() == (masked_batch_size, 1)
-        if negative_labels.dim() != 1:  # pragma: no cover
+        if negative_labels.dim() != 1:
             # [batch_size, seq_len, num_positives, num_negatives] -> [masked_batch_size, num_negatives]
             negative_labels = negative_labels[target_padding_mask]
             assert negative_labels.size() == (masked_batch_size, num_negatives)
@@ -183,7 +183,7 @@ def mask_negative_logits(
     if negative_labels_ignore_index >= 0:
         negative_logits.masked_fill_(negative_labels == negative_labels_ignore_index, -1e9)
-    if negative_labels.dim() > 1:  # pragma: no cover
+    if negative_labels.dim() > 1:
         # [masked_batch_size, num_negatives] -> [masked_batch_size, 1, num_negatives]
         negative_labels = negative_labels.unsqueeze(-2)

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/loss/login_ce.py RENAMED Viewed

@@ -74,7 +74,7 @@ class LogInCEBase(SampledLossBase):
         positive_labels = positive_labels[masked_target_padding_mask]
         assert positive_labels.size() == (masked_batch_size, num_positives)
-        if negative_labels.dim() > 1:  # pragma: no cover
+        if negative_labels.dim() > 1:
             # [batch_size, seq_len, num_negatives] -> [masked_batch_size, num_negatives]
             negative_labels = negative_labels[masked_target_padding_mask]
             assert negative_labels.size() == (masked_batch_size, num_negatives)

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/sequential/sasrec/model.py RENAMED Viewed

@@ -141,7 +141,7 @@ class SasRec(torch.nn.Module):
                     feature_type=FeatureType.CATEGORICAL,
                     embedding_dim=256,
                     padding_value=NUM_UNIQUE_ITEMS,
-                    cardinality=NUM_UNIQUE_ITEMS+1,
+                    cardinality=NUM_UNIQUE_ITEMS,
                     feature_hint=FeatureHint.ITEM_ID,
                     feature_sources=[TensorFeatureSource(FeatureSource.INTERACTIONS, "item_id")]
                 ),

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/sequential/twotower/reader.py RENAMED Viewed

@@ -22,7 +22,6 @@ class FeaturesReader:
         :param schema: the same tensor schema used in TwoTower model.
         :param metadata: A dictionary of feature names that
             associated with its shape and padding_value.\n
-            Example: {"item_id" : {"shape": 100, "padding": 7657}}.\n
             For details, see the section :ref:`parquet-processing`.
         :param path: path to parquet with dataframe of item features.\n
             **Note:**\n
@@ -30,8 +29,8 @@ class FeaturesReader:
             2. Every feature for item "tower" in `schema` must contain ``feature_sources`` with the names
                of the source features to create correct inverse mapping.
                Also, for each such feature one of the requirements must be met: the ``schema`` for the feature must
-               contain ``feature_sources`` with a source of type FeatureSource.ITEM_FEATURES
-               or hint type FeatureHint.ITEM_ID.
+               contain ``feature_sources`` with a source of type ``FeatureSource.ITEM_FEATURES``
+               or hint type ``FeatureHint.ITEM_ID``.
         """
         item_feature_names = [
@@ -81,8 +80,18 @@ class FeaturesReader:
         self._features = {}
         for k in features.columns:
-            dtype = torch.float32 if schema[k].is_num else torch.int64
-            feature_tensor = torch.asarray(features[k], dtype=dtype)
+            dtype = np.float32 if schema[k].is_num else np.int64
+            if schema[k].is_list:
+                feature = np.asarray(
+                    features[k].to_list(),
+                    dtype=dtype,
+                )
+            else:
+                feature = features[k].to_numpy(dtype=dtype)
+            feature_tensor = torch.asarray(
+                feature,
+                dtype=torch.float32 if schema[k].is_num else torch.int64,
+            )
             self._features[k] = feature_tensor
     def __getitem__(self, key: str) -> torch.Tensor:

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/transform/template/sasrec.py RENAMED Viewed

@@ -14,7 +14,7 @@ def make_default_sasrec_transforms(
     Generated pipeline expects input dataset to contain the following columns:
         1) Query ID column, specified by ``query_column``.
-        2) Item ID column, specified in the tensor schema.
+        2) All features specified in the ``tensor_schema``.
     :param tensor_schema: TensorSchema used to infer feature columns.
     :param query_column: Name of the column containing query IDs. Default: ``"query_id"``.
@@ -32,12 +32,12 @@ def make_default_sasrec_transforms(
         ),
         UnsqueezeTransform("target_padding_mask", -1),
         UnsqueezeTransform("positive_labels", -1),
-        GroupTransform({"feature_tensors": [item_column]}),
+        GroupTransform({"feature_tensors": tensor_schema.names}),
     ]
     val_transforms = [
         RenameTransform({query_column: "query_id", f"{item_column}_mask": "padding_mask"}),
-        GroupTransform({"feature_tensors": [item_column]}),
+        GroupTransform({"feature_tensors": tensor_schema.names}),
     ]
     test_transforms = copy.deepcopy(val_transforms)

{replay_rec-0.21.0rc0 → replay_rec-0.21.1}/replay/nn/transform/template/twotower.py RENAMED Viewed

@@ -13,7 +13,7 @@ def make_default_twotower_transforms(
     Generated pipeline expects input dataset to contain the following columns:
         1) Query ID column, specified by ``query_column``.
-        2) Item ID column, specified in the tensor schema.
+        2) All features specified in the ``tensor_schema``.
     :param tensor_schema: TensorSchema used to infer feature columns.
     :param query_column: Name of the column containing query IDs. Default: ``"query_id"``.

replay_rec-0.21.0rc0/replay/experimental/metrics/__init__.py DELETED Viewed

@@ -1,62 +0,0 @@
-"""
-Most metrics require dataframe with recommendations
-and dataframe with ground truth values —
-which objects each user interacted with.
-- recommendations (Union[pandas.DataFrame, spark.DataFrame]):
-    predictions of a recommender system,
-    DataFrame with columns ``[user_id, item_id, relevance]``
-- ground_truth (Union[pandas.DataFrame, spark.DataFrame]):
-    test data, DataFrame with columns
-    ``[user_id, item_id, timestamp, relevance]``
-Metric is calculated for all users, presented in ``ground_truth``
-for accurate metric calculation in case when the recommender system generated
-recommendation not for all users.  It is assumed, that all users,
-we want to calculate metric for, have positive interactions.
-But if we have users, who observed the recommendations, but have not responded,
-those users will be ignored and metric will be overestimated.
-For such case we propose additional optional parameter ``ground_truth_users``,
-the dataframe with all users, which should be considered during the metric calculation.
-- ground_truth_users (Optional[Union[pandas.DataFrame, spark.DataFrame]]):
-    full list of users to calculate metric for, DataFrame with ``user_id`` column
-Every metric is calculated using top ``K`` items for each user.
-It is also possible to calculate metrics
-using multiple values for ``K`` simultaneously.
-In this case the result will be a dictionary and not a number.
-Make sure your recommendations do not contain user-item duplicates
-as duplicates could lead to the wrong calculation results.
-- k (Union[Iterable[int], int]):
-    a single number or a list, specifying the
-    truncation length for recommendation list for each user
-By default, metrics are averaged by users,
-but you can alternatively use method ``metric.median``.
-Also, you can get the lower bound
-of ``conf_interval`` for a given ``alpha``.
-Diversity metrics require extra parameters on initialization stage,
-but do not use ``ground_truth`` parameter.
-For each metric, a formula for its calculation is given, because this is
-important for the correct comparison of algorithms, as mentioned in our
-`article <https://arxiv.org/abs/2206.12858>`_.
-"""
-from replay.experimental.metrics.base_metric import Metric, NCISMetric
-from replay.experimental.metrics.coverage import Coverage
-from replay.experimental.metrics.hitrate import HitRate
-from replay.experimental.metrics.map import MAP
-from replay.experimental.metrics.mrr import MRR
-from replay.experimental.metrics.ncis_precision import NCISPrecision
-from replay.experimental.metrics.ndcg import NDCG
-from replay.experimental.metrics.precision import Precision
-from replay.experimental.metrics.recall import Recall
-from replay.experimental.metrics.rocauc import RocAuc
-from replay.experimental.metrics.surprisal import Surprisal
-from replay.experimental.metrics.unexpectedness import Unexpectedness

replay-rec 0.21.0rc0__tar.gz → 0.21.1__tar.gz

replay-rec 0.21.0rc0tar.gz → 0.21.1tar.gz