replay-rec 0.20.2__py3-none-any.whl → 0.20.3rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replay/__init__.py +1 -1
- replay/data/nn/sequential_dataset.py +8 -2
- replay/experimental/__init__.py +0 -0
- replay/experimental/metrics/__init__.py +62 -0
- replay/experimental/metrics/base_metric.py +603 -0
- replay/experimental/metrics/coverage.py +97 -0
- replay/experimental/metrics/experiment.py +175 -0
- replay/experimental/metrics/hitrate.py +26 -0
- replay/experimental/metrics/map.py +30 -0
- replay/experimental/metrics/mrr.py +18 -0
- replay/experimental/metrics/ncis_precision.py +31 -0
- replay/experimental/metrics/ndcg.py +49 -0
- replay/experimental/metrics/precision.py +22 -0
- replay/experimental/metrics/recall.py +25 -0
- replay/experimental/metrics/rocauc.py +49 -0
- replay/experimental/metrics/surprisal.py +90 -0
- replay/experimental/metrics/unexpectedness.py +76 -0
- replay/experimental/models/__init__.py +50 -0
- replay/experimental/models/admm_slim.py +257 -0
- replay/experimental/models/base_neighbour_rec.py +200 -0
- replay/experimental/models/base_rec.py +1386 -0
- replay/experimental/models/base_torch_rec.py +234 -0
- replay/experimental/models/cql.py +454 -0
- replay/experimental/models/ddpg.py +932 -0
- replay/experimental/models/dt4rec/__init__.py +0 -0
- replay/experimental/models/dt4rec/dt4rec.py +189 -0
- replay/experimental/models/dt4rec/gpt1.py +401 -0
- replay/experimental/models/dt4rec/trainer.py +127 -0
- replay/experimental/models/dt4rec/utils.py +264 -0
- replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
- replay/experimental/models/extensions/spark_custom_models/als_extension.py +792 -0
- replay/experimental/models/hierarchical_recommender.py +331 -0
- replay/experimental/models/implicit_wrap.py +131 -0
- replay/experimental/models/lightfm_wrap.py +303 -0
- replay/experimental/models/mult_vae.py +332 -0
- replay/experimental/models/neural_ts.py +986 -0
- replay/experimental/models/neuromf.py +406 -0
- replay/experimental/models/scala_als.py +293 -0
- replay/experimental/models/u_lin_ucb.py +115 -0
- replay/experimental/nn/data/__init__.py +1 -0
- replay/experimental/nn/data/schema_builder.py +102 -0
- replay/experimental/preprocessing/__init__.py +3 -0
- replay/experimental/preprocessing/data_preparator.py +839 -0
- replay/experimental/preprocessing/padder.py +229 -0
- replay/experimental/preprocessing/sequence_generator.py +208 -0
- replay/experimental/scenarios/__init__.py +1 -0
- replay/experimental/scenarios/obp_wrapper/__init__.py +8 -0
- replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +74 -0
- replay/experimental/scenarios/obp_wrapper/replay_offline.py +261 -0
- replay/experimental/scenarios/obp_wrapper/utils.py +85 -0
- replay/experimental/scenarios/two_stages/__init__.py +0 -0
- replay/experimental/scenarios/two_stages/reranker.py +117 -0
- replay/experimental/scenarios/two_stages/two_stages_scenario.py +757 -0
- replay/experimental/utils/__init__.py +0 -0
- replay/experimental/utils/logger.py +24 -0
- replay/experimental/utils/model_handler.py +186 -0
- replay/experimental/utils/session_handler.py +44 -0
- {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/METADATA +11 -17
- {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/RECORD +62 -7
- {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/WHEEL +0 -0
- {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/licenses/LICENSE +0 -0
- {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from replay.experimental.models.base_rec import HybridRecommender
|
|
7
|
+
from replay.utils import PYSPARK_AVAILABLE, SparkDataFrame
|
|
8
|
+
|
|
9
|
+
if PYSPARK_AVAILABLE:
|
|
10
|
+
from replay.utils.spark_utils import convert2spark
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ULinUCB(HybridRecommender):
|
|
14
|
+
"""
|
|
15
|
+
A recommender implicitly proposed by
|
|
16
|
+
`Song et al <https://arxiv.org/abs/2110.09905>`_.
|
|
17
|
+
Is used as the default node recommender in :class:`HierarchicalRecommender`.
|
|
18
|
+
Shares all the logic with classical item-disjoint :class:`LinUCB` but is
|
|
19
|
+
user-disjoint instead. May be useful in problems with fixed number of users
|
|
20
|
+
and item-oriented data.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
alpha: float = -2.0,
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
:param alpha: exploration coefficient
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
self._alpha = alpha
|
|
32
|
+
super().__init__()
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def _init_args(self):
|
|
36
|
+
return {
|
|
37
|
+
"alpha": self._alpha,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def _fit(
|
|
41
|
+
self,
|
|
42
|
+
log: SparkDataFrame,
|
|
43
|
+
user_features: SparkDataFrame,
|
|
44
|
+
item_features: SparkDataFrame,
|
|
45
|
+
) -> None:
|
|
46
|
+
# prepare data
|
|
47
|
+
log = log.drop("timestamp").toPandas()
|
|
48
|
+
|
|
49
|
+
user_features = user_features.orderBy("user_idx").drop("user_idx").toPandas()
|
|
50
|
+
item_features = item_features.orderBy("item_idx").drop("item_idx").toPandas()
|
|
51
|
+
|
|
52
|
+
self._num_users, _ = user_features.shape
|
|
53
|
+
self._num_items, self._num_item_features = item_features.shape
|
|
54
|
+
|
|
55
|
+
self._init_params()
|
|
56
|
+
|
|
57
|
+
# main fit loop
|
|
58
|
+
for user_idx, user_batch in log.groupby("user_idx"):
|
|
59
|
+
self._update_params(
|
|
60
|
+
user_idx,
|
|
61
|
+
user_batch["item_idx"].astype(int).to_numpy(),
|
|
62
|
+
user_batch["relevance"].astype(float).to_numpy(),
|
|
63
|
+
item_features.astype(float).to_numpy(),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def _predict(
|
|
67
|
+
self,
|
|
68
|
+
log: SparkDataFrame, # noqa: ARG002
|
|
69
|
+
k: int,
|
|
70
|
+
users: SparkDataFrame,
|
|
71
|
+
items: Optional[SparkDataFrame] = None, # noqa: ARG002
|
|
72
|
+
user_features: Optional[SparkDataFrame] = None, # noqa: ARG002
|
|
73
|
+
item_features: Optional[SparkDataFrame] = None, # noqa: ARG002
|
|
74
|
+
filter_seen_items: bool = True, # noqa: ARG002
|
|
75
|
+
oversample: int = 20,
|
|
76
|
+
) -> SparkDataFrame:
|
|
77
|
+
extended_k = oversample * k
|
|
78
|
+
|
|
79
|
+
user_idx = users.toPandas()["user_idx"].astype(int).to_numpy()
|
|
80
|
+
|
|
81
|
+
pd_ucb = pd.DataFrame(self.get_relevance(user_idx), index=user_idx)
|
|
82
|
+
|
|
83
|
+
pred_df = pd_ucb.stack().reset_index()
|
|
84
|
+
pred_df.columns = ["user_idx", "item_idx", "relevance"]
|
|
85
|
+
|
|
86
|
+
pred_df = pred_df.sort_values(["user_idx", "relevance"], ascending=False).groupby("user_idx").head(extended_k)
|
|
87
|
+
|
|
88
|
+
return convert2spark(pred_df)
|
|
89
|
+
|
|
90
|
+
def _init_params(self) -> None:
|
|
91
|
+
self._theta = np.zeros((self._num_users, self._num_item_features))
|
|
92
|
+
self._b = np.zeros(self._num_item_features)
|
|
93
|
+
self._A = np.eye(self._num_item_features)
|
|
94
|
+
self._ucb = np.zeros((self._num_users, self._num_items))
|
|
95
|
+
|
|
96
|
+
def _update_params(
|
|
97
|
+
self,
|
|
98
|
+
user_idx: int,
|
|
99
|
+
items_idx: np.ndarray,
|
|
100
|
+
rewards: np.ndarray,
|
|
101
|
+
item_features: np.ndarray,
|
|
102
|
+
) -> None:
|
|
103
|
+
self._A = self._A + item_features[items_idx].T @ item_features[items_idx]
|
|
104
|
+
self._b = self._b + item_features[items_idx].T @ rewards
|
|
105
|
+
self._theta[user_idx] = np.linalg.inv(self._A) @ self._b
|
|
106
|
+
|
|
107
|
+
self._ucb[user_idx] = self._theta[user_idx] @ item_features.T + self._alpha * np.sqrt(
|
|
108
|
+
np.sum(
|
|
109
|
+
item_features.T * (np.linalg.inv(self._A) @ item_features.T),
|
|
110
|
+
axis=0,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def get_relevance(self, user_idx):
|
|
115
|
+
return self._ucb[user_idx]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .schema_builder import TensorSchemaBuilder
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from replay.data import FeatureHint, FeatureType
|
|
4
|
+
from replay.data.nn.schema import TensorFeatureInfo, TensorFeatureSource, TensorSchema
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TensorSchemaBuilder:
|
|
8
|
+
"""
|
|
9
|
+
Builder that simplifies creating tensor schema
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self._tensor_schema: dict[str, TensorFeatureInfo] = {}
|
|
14
|
+
|
|
15
|
+
def categorical(
|
|
16
|
+
self,
|
|
17
|
+
name: str,
|
|
18
|
+
cardinality: int,
|
|
19
|
+
is_seq: bool = False,
|
|
20
|
+
feature_source: Optional[TensorFeatureSource] = None,
|
|
21
|
+
feature_hint: Optional[FeatureHint] = None,
|
|
22
|
+
embedding_dim: Optional[int] = None,
|
|
23
|
+
padding_value: int = 0,
|
|
24
|
+
) -> "TensorSchemaBuilder":
|
|
25
|
+
source = [feature_source] if feature_source else None
|
|
26
|
+
self._tensor_schema[name] = TensorFeatureInfo(
|
|
27
|
+
name=name,
|
|
28
|
+
feature_type=FeatureType.CATEGORICAL,
|
|
29
|
+
is_seq=is_seq,
|
|
30
|
+
feature_sources=source,
|
|
31
|
+
feature_hint=feature_hint,
|
|
32
|
+
cardinality=cardinality,
|
|
33
|
+
padding_value=padding_value,
|
|
34
|
+
embedding_dim=embedding_dim,
|
|
35
|
+
)
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def numerical(
|
|
39
|
+
self,
|
|
40
|
+
name: str,
|
|
41
|
+
tensor_dim: int,
|
|
42
|
+
is_seq: bool = False,
|
|
43
|
+
feature_sources: Optional[list[TensorFeatureSource]] = None,
|
|
44
|
+
feature_hint: Optional[FeatureHint] = None,
|
|
45
|
+
padding_value: int = 0,
|
|
46
|
+
) -> "TensorSchemaBuilder":
|
|
47
|
+
self._tensor_schema[name] = TensorFeatureInfo(
|
|
48
|
+
name=name,
|
|
49
|
+
feature_type=FeatureType.NUMERICAL,
|
|
50
|
+
is_seq=is_seq,
|
|
51
|
+
feature_sources=feature_sources,
|
|
52
|
+
feature_hint=feature_hint,
|
|
53
|
+
tensor_dim=tensor_dim,
|
|
54
|
+
padding_value=padding_value,
|
|
55
|
+
)
|
|
56
|
+
return self
|
|
57
|
+
|
|
58
|
+
def categorical_list(
|
|
59
|
+
self,
|
|
60
|
+
name: str,
|
|
61
|
+
cardinality: int,
|
|
62
|
+
is_seq: bool = False,
|
|
63
|
+
feature_source: Optional[TensorFeatureSource] = None,
|
|
64
|
+
feature_hint: Optional[FeatureHint] = None,
|
|
65
|
+
embedding_dim: Optional[int] = None,
|
|
66
|
+
padding_value: int = 0,
|
|
67
|
+
) -> "TensorSchemaBuilder":
|
|
68
|
+
source = [feature_source] if feature_source else None
|
|
69
|
+
self._tensor_schema[name] = TensorFeatureInfo(
|
|
70
|
+
name=name,
|
|
71
|
+
feature_type=FeatureType.CATEGORICAL_LIST,
|
|
72
|
+
is_seq=is_seq,
|
|
73
|
+
feature_sources=source,
|
|
74
|
+
feature_hint=feature_hint,
|
|
75
|
+
cardinality=cardinality,
|
|
76
|
+
padding_value=padding_value,
|
|
77
|
+
embedding_dim=embedding_dim,
|
|
78
|
+
)
|
|
79
|
+
return self
|
|
80
|
+
|
|
81
|
+
def numerical_list(
|
|
82
|
+
self,
|
|
83
|
+
name: str,
|
|
84
|
+
tensor_dim: int,
|
|
85
|
+
is_seq: bool = False,
|
|
86
|
+
feature_sources: Optional[list[TensorFeatureSource]] = None,
|
|
87
|
+
feature_hint: Optional[FeatureHint] = None,
|
|
88
|
+
padding_value: int = 0,
|
|
89
|
+
) -> "TensorSchemaBuilder":
|
|
90
|
+
self._tensor_schema[name] = TensorFeatureInfo(
|
|
91
|
+
name=name,
|
|
92
|
+
feature_type=FeatureType.NUMERICAL_LIST,
|
|
93
|
+
is_seq=is_seq,
|
|
94
|
+
feature_sources=feature_sources,
|
|
95
|
+
feature_hint=feature_hint,
|
|
96
|
+
tensor_dim=tensor_dim,
|
|
97
|
+
padding_value=padding_value,
|
|
98
|
+
)
|
|
99
|
+
return self
|
|
100
|
+
|
|
101
|
+
def build(self) -> TensorSchema:
|
|
102
|
+
return TensorSchema(list(self._tensor_schema.values()))
|