replay-rec 0.20.2__py3-none-any.whl → 0.20.3rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. replay/__init__.py +1 -1
  2. replay/data/nn/sequential_dataset.py +8 -2
  3. replay/experimental/__init__.py +0 -0
  4. replay/experimental/metrics/__init__.py +62 -0
  5. replay/experimental/metrics/base_metric.py +603 -0
  6. replay/experimental/metrics/coverage.py +97 -0
  7. replay/experimental/metrics/experiment.py +175 -0
  8. replay/experimental/metrics/hitrate.py +26 -0
  9. replay/experimental/metrics/map.py +30 -0
  10. replay/experimental/metrics/mrr.py +18 -0
  11. replay/experimental/metrics/ncis_precision.py +31 -0
  12. replay/experimental/metrics/ndcg.py +49 -0
  13. replay/experimental/metrics/precision.py +22 -0
  14. replay/experimental/metrics/recall.py +25 -0
  15. replay/experimental/metrics/rocauc.py +49 -0
  16. replay/experimental/metrics/surprisal.py +90 -0
  17. replay/experimental/metrics/unexpectedness.py +76 -0
  18. replay/experimental/models/__init__.py +50 -0
  19. replay/experimental/models/admm_slim.py +257 -0
  20. replay/experimental/models/base_neighbour_rec.py +200 -0
  21. replay/experimental/models/base_rec.py +1386 -0
  22. replay/experimental/models/base_torch_rec.py +234 -0
  23. replay/experimental/models/cql.py +454 -0
  24. replay/experimental/models/ddpg.py +932 -0
  25. replay/experimental/models/dt4rec/__init__.py +0 -0
  26. replay/experimental/models/dt4rec/dt4rec.py +189 -0
  27. replay/experimental/models/dt4rec/gpt1.py +401 -0
  28. replay/experimental/models/dt4rec/trainer.py +127 -0
  29. replay/experimental/models/dt4rec/utils.py +264 -0
  30. replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
  31. replay/experimental/models/extensions/spark_custom_models/als_extension.py +792 -0
  32. replay/experimental/models/hierarchical_recommender.py +331 -0
  33. replay/experimental/models/implicit_wrap.py +131 -0
  34. replay/experimental/models/lightfm_wrap.py +303 -0
  35. replay/experimental/models/mult_vae.py +332 -0
  36. replay/experimental/models/neural_ts.py +986 -0
  37. replay/experimental/models/neuromf.py +406 -0
  38. replay/experimental/models/scala_als.py +293 -0
  39. replay/experimental/models/u_lin_ucb.py +115 -0
  40. replay/experimental/nn/data/__init__.py +1 -0
  41. replay/experimental/nn/data/schema_builder.py +102 -0
  42. replay/experimental/preprocessing/__init__.py +3 -0
  43. replay/experimental/preprocessing/data_preparator.py +839 -0
  44. replay/experimental/preprocessing/padder.py +229 -0
  45. replay/experimental/preprocessing/sequence_generator.py +208 -0
  46. replay/experimental/scenarios/__init__.py +1 -0
  47. replay/experimental/scenarios/obp_wrapper/__init__.py +8 -0
  48. replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +74 -0
  49. replay/experimental/scenarios/obp_wrapper/replay_offline.py +261 -0
  50. replay/experimental/scenarios/obp_wrapper/utils.py +85 -0
  51. replay/experimental/scenarios/two_stages/__init__.py +0 -0
  52. replay/experimental/scenarios/two_stages/reranker.py +117 -0
  53. replay/experimental/scenarios/two_stages/two_stages_scenario.py +757 -0
  54. replay/experimental/utils/__init__.py +0 -0
  55. replay/experimental/utils/logger.py +24 -0
  56. replay/experimental/utils/model_handler.py +186 -0
  57. replay/experimental/utils/session_handler.py +44 -0
  58. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/METADATA +11 -17
  59. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/RECORD +62 -7
  60. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/WHEEL +0 -0
  61. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/licenses/LICENSE +0 -0
  62. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,115 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from replay.experimental.models.base_rec import HybridRecommender
7
+ from replay.utils import PYSPARK_AVAILABLE, SparkDataFrame
8
+
9
+ if PYSPARK_AVAILABLE:
10
+ from replay.utils.spark_utils import convert2spark
11
+
12
+
13
+ class ULinUCB(HybridRecommender):
14
+ """
15
+ A recommender implicitly proposed by
16
+ `Song et al <https://arxiv.org/abs/2110.09905>`_.
17
+ Is used as the default node recommender in :class:`HierarchicalRecommender`.
18
+ Shares all the logic with classical item-disjoint :class:`LinUCB` but is
19
+ user-disjoint instead. May be useful in problems with fixed number of users
20
+ and item-oriented data.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ alpha: float = -2.0,
26
+ ):
27
+ """
28
+ :param alpha: exploration coefficient
29
+ """
30
+
31
+ self._alpha = alpha
32
+ super().__init__()
33
+
34
+ @property
35
+ def _init_args(self):
36
+ return {
37
+ "alpha": self._alpha,
38
+ }
39
+
40
+ def _fit(
41
+ self,
42
+ log: SparkDataFrame,
43
+ user_features: SparkDataFrame,
44
+ item_features: SparkDataFrame,
45
+ ) -> None:
46
+ # prepare data
47
+ log = log.drop("timestamp").toPandas()
48
+
49
+ user_features = user_features.orderBy("user_idx").drop("user_idx").toPandas()
50
+ item_features = item_features.orderBy("item_idx").drop("item_idx").toPandas()
51
+
52
+ self._num_users, _ = user_features.shape
53
+ self._num_items, self._num_item_features = item_features.shape
54
+
55
+ self._init_params()
56
+
57
+ # main fit loop
58
+ for user_idx, user_batch in log.groupby("user_idx"):
59
+ self._update_params(
60
+ user_idx,
61
+ user_batch["item_idx"].astype(int).to_numpy(),
62
+ user_batch["relevance"].astype(float).to_numpy(),
63
+ item_features.astype(float).to_numpy(),
64
+ )
65
+
66
+ def _predict(
67
+ self,
68
+ log: SparkDataFrame, # noqa: ARG002
69
+ k: int,
70
+ users: SparkDataFrame,
71
+ items: Optional[SparkDataFrame] = None, # noqa: ARG002
72
+ user_features: Optional[SparkDataFrame] = None, # noqa: ARG002
73
+ item_features: Optional[SparkDataFrame] = None, # noqa: ARG002
74
+ filter_seen_items: bool = True, # noqa: ARG002
75
+ oversample: int = 20,
76
+ ) -> SparkDataFrame:
77
+ extended_k = oversample * k
78
+
79
+ user_idx = users.toPandas()["user_idx"].astype(int).to_numpy()
80
+
81
+ pd_ucb = pd.DataFrame(self.get_relevance(user_idx), index=user_idx)
82
+
83
+ pred_df = pd_ucb.stack().reset_index()
84
+ pred_df.columns = ["user_idx", "item_idx", "relevance"]
85
+
86
+ pred_df = pred_df.sort_values(["user_idx", "relevance"], ascending=False).groupby("user_idx").head(extended_k)
87
+
88
+ return convert2spark(pred_df)
89
+
90
+ def _init_params(self) -> None:
91
+ self._theta = np.zeros((self._num_users, self._num_item_features))
92
+ self._b = np.zeros(self._num_item_features)
93
+ self._A = np.eye(self._num_item_features)
94
+ self._ucb = np.zeros((self._num_users, self._num_items))
95
+
96
+ def _update_params(
97
+ self,
98
+ user_idx: int,
99
+ items_idx: np.ndarray,
100
+ rewards: np.ndarray,
101
+ item_features: np.ndarray,
102
+ ) -> None:
103
+ self._A = self._A + item_features[items_idx].T @ item_features[items_idx]
104
+ self._b = self._b + item_features[items_idx].T @ rewards
105
+ self._theta[user_idx] = np.linalg.inv(self._A) @ self._b
106
+
107
+ self._ucb[user_idx] = self._theta[user_idx] @ item_features.T + self._alpha * np.sqrt(
108
+ np.sum(
109
+ item_features.T * (np.linalg.inv(self._A) @ item_features.T),
110
+ axis=0,
111
+ )
112
+ )
113
+
114
+ def get_relevance(self, user_idx):
115
+ return self._ucb[user_idx]
@@ -0,0 +1 @@
1
+ from .schema_builder import TensorSchemaBuilder
@@ -0,0 +1,102 @@
1
+ from typing import Optional
2
+
3
+ from replay.data import FeatureHint, FeatureType
4
+ from replay.data.nn.schema import TensorFeatureInfo, TensorFeatureSource, TensorSchema
5
+
6
+
7
+ class TensorSchemaBuilder:
8
+ """
9
+ Builder that simplifies creating tensor schema
10
+ """
11
+
12
+ def __init__(self) -> None:
13
+ self._tensor_schema: dict[str, TensorFeatureInfo] = {}
14
+
15
+ def categorical(
16
+ self,
17
+ name: str,
18
+ cardinality: int,
19
+ is_seq: bool = False,
20
+ feature_source: Optional[TensorFeatureSource] = None,
21
+ feature_hint: Optional[FeatureHint] = None,
22
+ embedding_dim: Optional[int] = None,
23
+ padding_value: int = 0,
24
+ ) -> "TensorSchemaBuilder":
25
+ source = [feature_source] if feature_source else None
26
+ self._tensor_schema[name] = TensorFeatureInfo(
27
+ name=name,
28
+ feature_type=FeatureType.CATEGORICAL,
29
+ is_seq=is_seq,
30
+ feature_sources=source,
31
+ feature_hint=feature_hint,
32
+ cardinality=cardinality,
33
+ padding_value=padding_value,
34
+ embedding_dim=embedding_dim,
35
+ )
36
+ return self
37
+
38
+ def numerical(
39
+ self,
40
+ name: str,
41
+ tensor_dim: int,
42
+ is_seq: bool = False,
43
+ feature_sources: Optional[list[TensorFeatureSource]] = None,
44
+ feature_hint: Optional[FeatureHint] = None,
45
+ padding_value: int = 0,
46
+ ) -> "TensorSchemaBuilder":
47
+ self._tensor_schema[name] = TensorFeatureInfo(
48
+ name=name,
49
+ feature_type=FeatureType.NUMERICAL,
50
+ is_seq=is_seq,
51
+ feature_sources=feature_sources,
52
+ feature_hint=feature_hint,
53
+ tensor_dim=tensor_dim,
54
+ padding_value=padding_value,
55
+ )
56
+ return self
57
+
58
+ def categorical_list(
59
+ self,
60
+ name: str,
61
+ cardinality: int,
62
+ is_seq: bool = False,
63
+ feature_source: Optional[TensorFeatureSource] = None,
64
+ feature_hint: Optional[FeatureHint] = None,
65
+ embedding_dim: Optional[int] = None,
66
+ padding_value: int = 0,
67
+ ) -> "TensorSchemaBuilder":
68
+ source = [feature_source] if feature_source else None
69
+ self._tensor_schema[name] = TensorFeatureInfo(
70
+ name=name,
71
+ feature_type=FeatureType.CATEGORICAL_LIST,
72
+ is_seq=is_seq,
73
+ feature_sources=source,
74
+ feature_hint=feature_hint,
75
+ cardinality=cardinality,
76
+ padding_value=padding_value,
77
+ embedding_dim=embedding_dim,
78
+ )
79
+ return self
80
+
81
+ def numerical_list(
82
+ self,
83
+ name: str,
84
+ tensor_dim: int,
85
+ is_seq: bool = False,
86
+ feature_sources: Optional[list[TensorFeatureSource]] = None,
87
+ feature_hint: Optional[FeatureHint] = None,
88
+ padding_value: int = 0,
89
+ ) -> "TensorSchemaBuilder":
90
+ self._tensor_schema[name] = TensorFeatureInfo(
91
+ name=name,
92
+ feature_type=FeatureType.NUMERICAL_LIST,
93
+ is_seq=is_seq,
94
+ feature_sources=feature_sources,
95
+ feature_hint=feature_hint,
96
+ tensor_dim=tensor_dim,
97
+ padding_value=padding_value,
98
+ )
99
+ return self
100
+
101
+ def build(self) -> TensorSchema:
102
+ return TensorSchema(list(self._tensor_schema.values()))
@@ -0,0 +1,3 @@
1
+ from .data_preparator import DataPreparator, Indexer
2
+ from .padder import Padder
3
+ from .sequence_generator import SequenceGenerator