PyPI - replay-rec - Versions diffs - 0.17.1__tar.gz → 0.18.0__tar.gz - Mend

replay-rec 0.17.1tar.gz → 0.18.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

{replay_rec-0.17.1 → replay_rec-0.18.0}/PKG-INFO RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.1
 Name: replay-rec
-Version: 0.17.1
+Version: 0.18.0
 Summary: RecSys Library
 Home-page: https://sb-ai-lab.github.io/RePlay/
 License: Apache-2.0
 Author: AI Lab
-Requires-Python: >=3.8.1,<3.11
+Requires-Python: >=3.8.1,<3.12
 Classifier: Development Status :: 4 - Beta
 Classifier: Environment :: Console
 Classifier: Intended Audience :: Developers
@@ -16,24 +16,26 @@ Classifier: Operating System :: Unix
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Provides-Extra: all
 Provides-Extra: spark
 Provides-Extra: torch
-Requires-Dist: hnswlib (==0.7.0)
-Requires-Dist: lightning (>=2.0.2,<3.0.0) ; extra == "torch" or extra == "all"
-Requires-Dist: nmslib (==2.1.1)
+Requires-Dist: fixed-install-nmslib (==2.1.2)
+Requires-Dist: hnswlib (>=0.7.0,<0.8.0)
+Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "all"
 Requires-Dist: numpy (>=1.20.0)
 Requires-Dist: optuna (>=3.2.0,<3.3.0)
 Requires-Dist: pandas (>=1.3.5,<=2.2.2)
-Requires-Dist: polars (>=0.20.7,<0.21.0)
-Requires-Dist: psutil (>=5.9.5,<5.10.0)
+Requires-Dist: polars (>=1.0.0,<1.1.0)
+Requires-Dist: psutil (>=6.0.0,<6.1.0)
 Requires-Dist: pyarrow (>=12.0.1)
-Requires-Dist: pyspark (>=3.0,<3.5) ; extra == "spark" or extra == "all"
+Requires-Dist: pyspark (>=3.0,<3.6) ; (python_full_version >= "3.8.1" and python_version < "3.11") and (extra == "spark" or extra == "all")
+Requires-Dist: pyspark (>=3.4,<3.6) ; (python_version >= "3.11" and python_version < "3.12") and (extra == "spark" or extra == "all")
 Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "all"
 Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
-Requires-Dist: scipy (>=1.8.1,<1.9.0)
-Requires-Dist: torch (>=1.8,<2.0) ; extra == "torch" or extra == "all"
+Requires-Dist: scipy (>=1.8.1,<2.0.0)
+Requires-Dist: torch (>=1.8,<=2.4.0) ; extra == "torch" or extra == "all"
 Project-URL: Repository, https://github.com/sb-ai-lab/RePlay
 Description-Content-Type: text/markdown

{replay_rec-0.17.1 → replay_rec-0.18.0}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ build-backend = "poetry_dynamic_versioning.backend"
 [tool.black]
 line-length = 120
-target-versions = ["py38", "py39", "py310"]
+target-versions = ["py38", "py39", "py310", "py311"]
 [tool.poetry]
 name = "replay-rec"
@@ -41,24 +41,27 @@ exclude = [
     "replay/conftest.py",
     "replay/experimental",
 ]
-version = "0.17.1"
+version = "0.18.0"
 [tool.poetry.dependencies]
-python = ">=3.8.1, <3.11"
+python = ">=3.8.1, <3.12"
 numpy = ">=1.20.0"
-pandas = ">=1.3.5,<=2.2.2"
-polars = "~0.20.7"
+pandas = ">=1.3.5, <=2.2.2"
+polars = "~1.0.0"
 optuna = "~3.2.0"
-scipy = "~1.8.1"
-psutil = "~5.9.5"
-pyspark = {version = ">=3.0,<3.5", optional = true}
+scipy = "^1.8.1"
+psutil = "~6.0.0"
 scikit-learn = "^1.0.2"
 pyarrow = ">=12.0.1"
-torch = {version = "^1.8", optional = true}
-lightning = {version = "^2.0.2", optional = true}
+pyspark = [
+    {version = ">=3.4,<3.6", python = ">=3.11,<3.12", optional = true},
+    {version = ">=3.0,<3.6", python = ">=3.8.1,<3.11", optional = true},
+]
+torch = {version = ">=1.8, <=2.4.0", optional = true}
+lightning = {version = ">=2.0.2, <=2.4.0", optional = true}
 pytorch-ranger = {version = "^0.1.1", optional = true}
-nmslib = "2.1.1"
-hnswlib = "0.7.0"
+fixed-install-nmslib = "2.1.2"
+hnswlib = "^0.7.0"
 [tool.poetry.extras]
 spark = ["pyspark"]
@@ -70,7 +73,7 @@ jupyter = "~1.0.0"
 jupyterlab = "^3.6.0"
 pytest = ">=7.1.0"
 pytest-cov = ">=3.0.0"
-statsmodels = "~0.13.5"
+statsmodels = "~0.14.0"
 black = ">=23.3.0"
 ruff = ">=0.0.261"
 toml-sort = "^0.23.0"
@@ -85,7 +88,7 @@ data-science-types = "0.2.23"
 [tool.poetry-dynamic-versioning]
 enable = false
-format-jinja = """0.17.1{{ env['PACKAGE_SUFFIX'] }}"""
+format-jinja = """0.18.0{{ env['PACKAGE_SUFFIX'] }}"""
 vcs = "git"
 [tool.ruff]

replay_rec-0.18.0/replay/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+""" RecSys library """
+__version__ = "0.18.0"

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/data/dataset.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """
 ``Dataset`` universal dataset class for manipulating interactions and feed data to models.
 """
 from __future__ import annotations
 import json
@@ -606,7 +607,7 @@ class Dataset:
         if self.is_pandas:
             min_id = data[column].min()
         elif self.is_spark:
-            min_id = data.agg(sf.min(column).alias("min_index")).collect()[0][0]
+            min_id = data.agg(sf.min(column).alias("min_index")).first()[0]
         else:
             min_id = data[column].min()
         if min_id < 0:
@@ -616,7 +617,7 @@ class Dataset:
         if self.is_pandas:
             max_id = data[column].max()
         elif self.is_spark:
-            max_id = data.agg(sf.max(column).alias("max_index")).collect()[0][0]
+            max_id = data.agg(sf.max(column).alias("max_index")).first()[0]
         else:
             max_id = data[column].max()

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/data/dataset_utils/dataset_label_encoder.py RENAMED Viewed

@@ -4,6 +4,7 @@ Contains classes for encoding categorical data
 ``LabelEncoderTransformWarning`` new category of warning for DatasetLabelEncoder.
 ``DatasetLabelEncoder`` to encode categorical features in `Dataset` objects.
 """
 import warnings
 from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/data/nn/schema.py RENAMED Viewed

@@ -418,11 +418,11 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
                 "feature_type": feature.feature_type.name,
                 "is_seq": feature.is_seq,
                 "feature_hint": feature.feature_hint.name if feature.feature_hint else None,
-                "feature_sources": [
-                    {"source": x.source.name, "column": x.column, "index": x.index} for x in feature.feature_sources
-                ]
-                if feature.feature_sources
-                else None,
+                "feature_sources": (
+                    [{"source": x.source.name, "column": x.column, "index": x.index} for x in feature.feature_sources]
+                    if feature.feature_sources
+                    else None
+                ),
                 "cardinality": feature.cardinality if feature.feature_type == FeatureType.CATEGORICAL else None,
                 "embedding_dim": feature.embedding_dim if feature.feature_type == FeatureType.CATEGORICAL else None,
                 "tensor_dim": feature.tensor_dim if feature.feature_type == FeatureType.NUMERICAL else None,

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/metrics/__init__.py RENAMED Viewed

@@ -42,6 +42,7 @@ For each metric, a formula for its calculation is given, because this is
 important for the correct comparison of algorithms, as mentioned in our
 `article <https://arxiv.org/abs/2206.12858>`_.
 """
 from .base_metric import Metric
 from .categorical_diversity import CategoricalDiversity
 from .coverage import Coverage

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/models/als.py RENAMED Viewed

@@ -115,7 +115,7 @@ class ALSWrap(Recommender, ItemVectorModel):
                     .groupBy(self.query_column)
                     .agg(sf.count(self.query_column).alias("num_seen"))
                     .select(sf.max("num_seen"))
-                    .collect()[0][0]
+                    .first()[0]
                 )
                 max_seen = max_seen_in_interactions if max_seen_in_interactions is not None else 0

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/models/base_rec.py RENAMED Viewed

@@ -401,8 +401,8 @@ class BaseRecommender(RecommenderCommons, IsSavable, ABC):
         self.fit_items = sf.broadcast(items)
         self._num_queries = self.fit_queries.count()
         self._num_items = self.fit_items.count()
-        self._query_dim_size = self.fit_queries.agg({self.query_column: "max"}).collect()[0][0] + 1
-        self._item_dim_size = self.fit_items.agg({self.item_column: "max"}).collect()[0][0] + 1
+        self._query_dim_size = self.fit_queries.agg({self.query_column: "max"}).first()[0] + 1
+        self._item_dim_size = self.fit_items.agg({self.item_column: "max"}).first()[0] + 1
         self._fit(dataset)
     @abstractmethod
@@ -431,7 +431,7 @@ class BaseRecommender(RecommenderCommons, IsSavable, ABC):
         # count maximal number of items seen by queries
         max_seen = 0
         if num_seen.count() > 0:
-            max_seen = num_seen.select(sf.max("seen_count")).collect()[0][0]
+            max_seen = num_seen.select(sf.max("seen_count")).first()[0]
         # crop recommendations to first k + max_seen items for each query
         recs = recs.withColumn(
@@ -708,7 +708,7 @@ class BaseRecommender(RecommenderCommons, IsSavable, ABC):
             setattr(
                 self,
                 dim_size,
-                fit_entities.agg({column: "max"}).collect()[0][0] + 1,
+                fit_entities.agg({column: "max"}).first()[0] + 1,
             )
         return getattr(self, dim_size)
@@ -1426,7 +1426,7 @@ class NonPersonalizedRecommender(Recommender, ABC):
         Calculating a fill value a the minimal rating
         calculated during model training multiplied by weight.
         """
-        return item_popularity.select(sf.min(rating_column)).collect()[0][0] * weight
+        return item_popularity.select(sf.min(rating_column)).first()[0] * weight
     @staticmethod
     def _check_rating(dataset: Dataset):
@@ -1460,7 +1460,7 @@ class NonPersonalizedRecommender(Recommender, ABC):
                 .agg(sf.countDistinct(item_column).alias("items_count"))
             )
             .select(sf.max("items_count"))
-            .collect()[0][0]
+            .first()[0]
         )
         # all queries have empty history
         if max_hist_len is None:
@@ -1495,7 +1495,7 @@ class NonPersonalizedRecommender(Recommender, ABC):
             queries = queries.join(query_to_num_items, on=self.query_column, how="left")
             queries = queries.fillna(0, "num_items")
             # 'selected_item_popularity' truncation by k + max_seen
-            max_seen = queries.select(sf.coalesce(sf.max("num_items"), sf.lit(0))).collect()[0][0]
+            max_seen = queries.select(sf.coalesce(sf.max("num_items"), sf.lit(0))).first()[0]
             selected_item_popularity = selected_item_popularity.filter(sf.col("rank") <= k + max_seen)
             return queries.join(selected_item_popularity, on=(sf.col("rank") <= k + sf.col("num_items")), how="left")

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py RENAMED Viewed

@@ -32,9 +32,9 @@ class NmslibFilterIndexInferer(IndexInferer):
             index = index_store.load_index(
                 init_index=lambda: create_nmslib_index_instance(index_params),
                 load_index=lambda index, path: index.loadIndex(path, load_data=True),
-                configure_index=lambda index: index.setQueryTimeParams({"efSearch": index_params.ef_s})
-                if index_params.ef_s
-                else None,
+                configure_index=lambda index: (
+                    index.setQueryTimeParams({"efSearch": index_params.ef_s}) if index_params.ef_s else None
+                ),
             )
             # max number of items to retrieve per batch

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py RENAMED Viewed

@@ -30,9 +30,9 @@ class NmslibIndexInferer(IndexInferer):
             index = index_store.load_index(
                 init_index=lambda: create_nmslib_index_instance(index_params),
                 load_index=lambda index, path: index.loadIndex(path, load_data=True),
-                configure_index=lambda index: index.setQueryTimeParams({"efSearch": index_params.ef_s})
-                if index_params.ef_s
-                else None,
+                configure_index=lambda index: (
+                    index.setQueryTimeParams({"efSearch": index_params.ef_s}) if index_params.ef_s else None
+                ),
             )
             user_vectors = get_csr_matrix(user_idx, vector_items, vector_ratings)

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/models/nn/sequential/bert4rec/model.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import contextlib
 import math
 from abc import ABC, abstractmethod
-from typing import Dict, Optional, Tuple, Union, cast
+from typing import Dict, Optional, Union
 import torch
@@ -115,13 +115,10 @@ class Bert4RecModel(torch.nn.Module):
         # (B x L x E)
         x = self.item_embedder(inputs, token_mask)
-        # (B x 1 x L x L)
-        pad_mask_for_attention = self._get_attention_mask_from_padding(pad_mask)
         # Running over multiple transformer blocks
         for transformer in self.transformer_blocks:
             for _ in range(self.num_passes_over_block):
-                x = transformer(x, pad_mask_for_attention)
+                x = transformer(x, pad_mask)
         return x
@@ -147,11 +144,6 @@ class Bert4RecModel(torch.nn.Module):
         """
         return self.forward_step(inputs, pad_mask, token_mask)[:, -1, :]
-    def _get_attention_mask_from_padding(self, pad_mask: torch.BoolTensor) -> torch.BoolTensor:
-        # (B x L) -> (B x 1 x L x L)
-        pad_mask_for_attention = pad_mask.unsqueeze(1).repeat(1, self.max_len, 1).unsqueeze(1)
-        return cast(torch.BoolTensor, pad_mask_for_attention)
     def _init(self) -> None:
         for _, param in self.named_parameters():
             with contextlib.suppress(ValueError):
@@ -456,7 +448,7 @@ class TransformerBlock(torch.nn.Module):
         :param dropout: Dropout rate.
         """
         super().__init__()
-        self.attention = MultiHeadedAttention(h=attn_heads, d_model=hidden_size, dropout=dropout)
+        self.attention = torch.nn.MultiheadAttention(hidden_size, attn_heads, dropout=dropout, batch_first=True)
         self.attention_dropout = torch.nn.Dropout(dropout)
         self.attention_norm = LayerNorm(hidden_size)
@@ -479,7 +471,8 @@ class TransformerBlock(torch.nn.Module):
         """
         # Attention + skip-connection
         x_norm = self.attention_norm(x)
-        y = x + self.attention_dropout(self.attention(x_norm, x_norm, x_norm, mask))
+        attent_emb, _ = self.attention(x_norm, x_norm, x_norm, key_padding_mask=~mask, need_weights=False)
+        y = x + self.attention_dropout(attent_emb)
         # PFF + skip-connection
         z = y + self.pff_dropout(self.pff(self.pff_norm(y)))
@@ -487,106 +480,6 @@ class TransformerBlock(torch.nn.Module):
         return self.dropout(z)
-class Attention(torch.nn.Module):
-    """
-    Compute Scaled Dot Product Attention
-    """
-    def __init__(self, dropout: float) -> None:
-        """
-        :param dropout: Dropout rate.
-        """
-        super().__init__()
-        self.dropout = torch.nn.Dropout(p=dropout)
-    def forward(
-        self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, mask: torch.BoolTensor
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        :param query: Query feature vector.
-        :param key: Key feature vector.
-        :param value: Value feature vector.
-        :param mask: Mask where 0 - <MASK>, 1 - otherwise.
-        :returns: Tuple of scaled dot product attention
-                and attention logits for each element.
-        """
-        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(query.size(-1))
-        scores = scores.masked_fill(mask == 0, -1e9)
-        p_attn = torch.nn.functional.softmax(scores, dim=-1)
-        p_attn = self.dropout(p_attn)
-        return torch.matmul(p_attn, value), p_attn
-class MultiHeadedAttention(torch.nn.Module):
-    """
-    Take in model size and number of heads.
-    """
-    def __init__(self, h: int, d_model: int, dropout: float = 0.1) -> None:
-        """
-        :param h: Head sizes of multi-head attention.
-        :param d_model: Embedding dimension.
-        :param dropout: Dropout rate.
-            Default: ``0.1``.
-        """
-        super().__init__()
-        assert d_model % h == 0
-        # We assume d_v always equals d_k
-        self.d_k = d_model // h
-        self.h = h
-        # 3 linear projections for Q, K, V
-        self.qkv_linear_layers = torch.nn.ModuleList([torch.nn.Linear(d_model, d_model) for _ in range(3)])
-        # 2 linear projections for P -> P_q, P_k
-        self.pos_linear_layers = torch.nn.ModuleList([torch.nn.Linear(d_model, d_model) for _ in range(2)])
-        self.output_linear = torch.nn.Linear(d_model, d_model)
-        self.attention = Attention(dropout)
-    def forward(
-        self,
-        query: torch.Tensor,
-        key: torch.Tensor,
-        value: torch.Tensor,
-        mask: torch.BoolTensor,
-    ) -> torch.Tensor:
-        """
-        :param query: Query feature vector.
-        :param key: Key feature vector.
-        :param value: Value feature vector.
-        :param mask: Mask where 0 - <MASK>, 1 - otherwise.
-        :returns: Attention outputs.
-        """
-        batch_size = query.size(0)
-        # B - batch size
-        # L - sequence length (max_len)
-        # E - embedding size for tokens fed into transformer
-        # K - max relative distance
-        # H - attention head count
-        # Do all the linear projections in batch from d_model => h x d_k
-        # (B x L x E) -> (B x H x L x (E / H))
-        query, key, value = [
-            layer(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2)
-            for layer, x in zip(self.qkv_linear_layers, (query, key, value))
-        ]
-        x, _ = self.attention(query, key, value, mask)
-        # Concat using a view and apply a final linear.
-        x = x.transpose(1, 2).contiguous().view(batch_size, -1, self.h * self.d_k)
-        return self.output_linear(x)
 class LayerNorm(torch.nn.Module):
     """
     Construct a layernorm module (See citation for details).

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/models/nn/sequential/sasrec/model.py RENAMED Viewed

@@ -401,7 +401,12 @@ class SasRecLayers(torch.nn.Module):
         """
         super().__init__()
         self.attention_layers = self._layers_stacker(
-            num_blocks, torch.nn.MultiheadAttention, hidden_size, num_heads, dropout
+            num_blocks,
+            torch.nn.MultiheadAttention,
+            hidden_size,
+            num_heads,
+            dropout,
+            batch_first=True,
         )
         self.attention_layernorms = self._layers_stacker(num_blocks, torch.nn.LayerNorm, hidden_size, eps=1e-8)
         self.forward_layers = self._layers_stacker(num_blocks, SasRecPointWiseFeedForward, hidden_size, dropout)
@@ -422,11 +427,9 @@ class SasRecLayers(torch.nn.Module):
         """
         length = len(self.attention_layers)
         for i in range(length):
-            seqs = torch.transpose(seqs, 0, 1)
             query = self.attention_layernorms[i](seqs)
-            attent_emb, _ = self.attention_layers[i](query, seqs, seqs, attn_mask=attention_mask)
+            attent_emb, _ = self.attention_layers[i](query, seqs, seqs, attn_mask=attention_mask, need_weights=False)
             seqs = query + attent_emb
-            seqs = torch.transpose(seqs, 0, 1)
             seqs = self.forward_layernorms[i](seqs)
             seqs = self.forward_layers[i](seqs)
@@ -492,7 +495,7 @@ class SasRecPointWiseFeedForward(torch.nn.Module):
         :returns: Output tensors.
         """
-        outputs = self.dropout2(self.conv2(self.relu(self.dropout1(self.conv1(inputs.transpose(-1, -2))))))
+        outputs = self.dropout2(self.conv2(self.dropout1(self.relu(self.conv1(inputs.transpose(-1, -2))))))
         outputs = outputs.transpose(-1, -2)
         outputs += inputs

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/optimization/optuna_objective.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """
 This class calculates loss function for optimization process
 """
 import collections
 import logging
 from functools import partial

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/preprocessing/converter.py RENAMED Viewed

@@ -102,6 +102,6 @@ class CSRConverter:
         row_count = self.row_count if self.row_count is not None else _get_max(rows_data) + 1
         col_count = self.column_count if self.column_count is not None else _get_max(cols_data) + 1
         return csr_matrix(
-            (data, (rows_data, cols_data)),
+            (data.tolist(), (rows_data.tolist(), cols_data.tolist())),
             shape=(row_count, col_count),
         )

{replay_rec-0.17.1 → replay_rec-0.18.0}/replay/preprocessing/filters.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """
 Select or remove data by some criteria
 """
 from abc import ABC, abstractmethod
 from datetime import datetime, timedelta
 from typing import Callable, Optional, Tuple, Union
@@ -355,8 +356,8 @@ class NumInteractionsFilter(_BaseFilter):
     >>> log_pd = pd.DataFrame({"user_id": ["u1", "u2", "u2", "u3", "u3", "u3"],
     ...                     "item_id": ["i1", "i2","i3", "i1", "i2","i3"],
     ...                     "rating": [1., 0.5, 3, 1, 0, 1],
-    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01",
-    ...                                   "2020-02-01", "2020-01-01 00:04:15",
+    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01 00:00:00",
+    ...                                   "2020-02-01 00:00:01", "2020-01-01 00:04:15",
     ...                                   "2020-01-02 00:04:14", "2020-01-05 23:59:59"]},
     ...             )
     >>> log_pd["timestamp"] = pd.to_datetime(log_pd["timestamp"], format="ISO8601")
@@ -367,7 +368,7 @@ class NumInteractionsFilter(_BaseFilter):
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i1|   1.0|2020-01-01 00:04:15|
     |     u3|     i2|   0.0|2020-01-02 00:04:14|
     |     u3|     i3|   1.0|2020-01-05 23:59:59|
@@ -393,7 +394,7 @@ class NumInteractionsFilter(_BaseFilter):
     |user_id|item_id|rating|          timestamp|
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
-    |     u2|     i2|   0.5|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i3|   1.0|2020-01-05 23:59:59|
     +-------+-------+------+-------------------+
     <BLANKLINE>
@@ -403,7 +404,7 @@ class NumInteractionsFilter(_BaseFilter):
     |user_id|item_id|rating|          timestamp|
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i3|   1.0|2020-01-05 23:59:59|
     +-------+-------+------+-------------------+
     <BLANKLINE>
@@ -482,7 +483,7 @@ class NumInteractionsFilter(_BaseFilter):
         return (
             interactions.sort(sorting_columns, descending=descending)
-            .with_columns(pl.col(self.query_column).cumcount().over(self.query_column).alias("temp_rank"))
+            .with_columns(pl.col(self.query_column).cum_count().over(self.query_column).alias("temp_rank"))
             .filter(pl.col("temp_rank") <= self.num_interactions)
             .drop("temp_rank")
         )
@@ -497,8 +498,8 @@ class EntityDaysFilter(_BaseFilter):
     >>> log_pd = pd.DataFrame({"user_id": ["u1", "u2", "u2", "u3", "u3", "u3"],
     ...                     "item_id": ["i1", "i2","i3", "i1", "i2","i3"],
     ...                     "rating": [1., 0.5, 3, 1, 0, 1],
-    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01",
-    ...                                   "2020-02-01", "2020-01-01 00:04:15",
+    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01 00:00:00",
+    ...                                   "2020-02-01 00:00:01", "2020-01-01 00:04:15",
     ...                                   "2020-01-02 00:04:14", "2020-01-05 23:59:59"]},
     ...             )
     >>> log_pd["timestamp"] = pd.to_datetime(log_pd["timestamp"], format="ISO8601")
@@ -509,7 +510,7 @@ class EntityDaysFilter(_BaseFilter):
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i1|   1.0|2020-01-01 00:04:15|
     |     u3|     i2|   0.0|2020-01-02 00:04:14|
     |     u3|     i3|   1.0|2020-01-05 23:59:59|
@@ -524,7 +525,7 @@ class EntityDaysFilter(_BaseFilter):
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i1|   1.0|2020-01-01 00:04:15|
     |     u3|     i2|   0.0|2020-01-02 00:04:14|
     +-------+-------+------+-------------------+
@@ -539,7 +540,7 @@ class EntityDaysFilter(_BaseFilter):
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
     |     u3|     i1|   1.0|2020-01-01 00:04:15|
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     +-------+-------+------+-------------------+
     <BLANKLINE>
     """
@@ -636,8 +637,8 @@ class GlobalDaysFilter(_BaseFilter):
     >>> log_pd = pd.DataFrame({"user_id": ["u1", "u2", "u2", "u3", "u3", "u3"],
     ...                     "item_id": ["i1", "i2","i3", "i1", "i2","i3"],
     ...                     "rating": [1., 0.5, 3, 1, 0, 1],
-    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01",
-    ...                                   "2020-02-01", "2020-01-01 00:04:15",
+    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01 00:00:00",
+    ...                                   "2020-02-01 00:00:01", "2020-01-01 00:04:15",
     ...                                   "2020-01-02 00:04:14", "2020-01-05 23:59:59"]},
     ...             )
     >>> log_pd["timestamp"] = pd.to_datetime(log_pd["timestamp"], format="ISO8601")
@@ -648,7 +649,7 @@ class GlobalDaysFilter(_BaseFilter):
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i1|   1.0|2020-01-01 00:04:15|
     |     u3|     i2|   0.0|2020-01-02 00:04:14|
     |     u3|     i3|   1.0|2020-01-05 23:59:59|
@@ -670,7 +671,7 @@ class GlobalDaysFilter(_BaseFilter):
     |user_id|item_id|rating|          timestamp|
     +-------+-------+------+-------------------+
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     +-------+-------+------+-------------------+
     <BLANKLINE>
     """
@@ -738,8 +739,8 @@ class TimePeriodFilter(_BaseFilter):
     >>> log_pd = pd.DataFrame({"user_id": ["u1", "u2", "u2", "u3", "u3", "u3"],
     ...                     "item_id": ["i1", "i2","i3", "i1", "i2","i3"],
     ...                     "rating": [1., 0.5, 3, 1, 0, 1],
-    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01",
-    ...                                   "2020-02-01", "2020-01-01 00:04:15",
+    ...                     "timestamp": ["2020-01-01 23:59:59", "2020-02-01 00:00:00",
+    ...                                   "2020-02-01 00:00:01", "2020-01-01 00:04:15",
     ...                                   "2020-01-02 00:04:14", "2020-01-05 23:59:59"]},
     ...             )
     >>> log_pd["timestamp"] = pd.to_datetime(log_pd["timestamp"], format="ISO8601")
@@ -750,7 +751,7 @@ class TimePeriodFilter(_BaseFilter):
     +-------+-------+------+-------------------+
     |     u1|     i1|   1.0|2020-01-01 23:59:59|
     |     u2|     i2|   0.5|2020-02-01 00:00:00|
-    |     u2|     i3|   3.0|2020-02-01 00:00:00|
+    |     u2|     i3|   3.0|2020-02-01 00:00:01|
     |     u3|     i1|   1.0|2020-01-01 00:04:15|
     |     u3|     i2|   0.0|2020-01-02 00:04:14|
     |     u3|     i3|   1.0|2020-01-05 23:59:59|

replay-rec 0.17.1__tar.gz → 0.18.0__tar.gz

replay-rec 0.17.1tar.gz → 0.18.0tar.gz