replay-rec 0.20.1rc0__py3-none-any.whl → 0.20.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replay/__init__.py +1 -1
- {replay_rec-0.20.1rc0.dist-info → replay_rec-0.20.2.dist-info}/METADATA +18 -12
- {replay_rec-0.20.1rc0.dist-info → replay_rec-0.20.2.dist-info}/RECORD +6 -61
- replay/experimental/__init__.py +0 -0
- replay/experimental/metrics/__init__.py +0 -62
- replay/experimental/metrics/base_metric.py +0 -603
- replay/experimental/metrics/coverage.py +0 -97
- replay/experimental/metrics/experiment.py +0 -175
- replay/experimental/metrics/hitrate.py +0 -26
- replay/experimental/metrics/map.py +0 -30
- replay/experimental/metrics/mrr.py +0 -18
- replay/experimental/metrics/ncis_precision.py +0 -31
- replay/experimental/metrics/ndcg.py +0 -49
- replay/experimental/metrics/precision.py +0 -22
- replay/experimental/metrics/recall.py +0 -25
- replay/experimental/metrics/rocauc.py +0 -49
- replay/experimental/metrics/surprisal.py +0 -90
- replay/experimental/metrics/unexpectedness.py +0 -76
- replay/experimental/models/__init__.py +0 -50
- replay/experimental/models/admm_slim.py +0 -257
- replay/experimental/models/base_neighbour_rec.py +0 -200
- replay/experimental/models/base_rec.py +0 -1386
- replay/experimental/models/base_torch_rec.py +0 -234
- replay/experimental/models/cql.py +0 -454
- replay/experimental/models/ddpg.py +0 -932
- replay/experimental/models/dt4rec/__init__.py +0 -0
- replay/experimental/models/dt4rec/dt4rec.py +0 -189
- replay/experimental/models/dt4rec/gpt1.py +0 -401
- replay/experimental/models/dt4rec/trainer.py +0 -127
- replay/experimental/models/dt4rec/utils.py +0 -264
- replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
- replay/experimental/models/extensions/spark_custom_models/als_extension.py +0 -792
- replay/experimental/models/hierarchical_recommender.py +0 -331
- replay/experimental/models/implicit_wrap.py +0 -131
- replay/experimental/models/lightfm_wrap.py +0 -303
- replay/experimental/models/mult_vae.py +0 -332
- replay/experimental/models/neural_ts.py +0 -986
- replay/experimental/models/neuromf.py +0 -406
- replay/experimental/models/scala_als.py +0 -293
- replay/experimental/models/u_lin_ucb.py +0 -115
- replay/experimental/nn/data/__init__.py +0 -1
- replay/experimental/nn/data/schema_builder.py +0 -102
- replay/experimental/preprocessing/__init__.py +0 -3
- replay/experimental/preprocessing/data_preparator.py +0 -839
- replay/experimental/preprocessing/padder.py +0 -229
- replay/experimental/preprocessing/sequence_generator.py +0 -208
- replay/experimental/scenarios/__init__.py +0 -1
- replay/experimental/scenarios/obp_wrapper/__init__.py +0 -8
- replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +0 -74
- replay/experimental/scenarios/obp_wrapper/replay_offline.py +0 -261
- replay/experimental/scenarios/obp_wrapper/utils.py +0 -85
- replay/experimental/scenarios/two_stages/__init__.py +0 -0
- replay/experimental/scenarios/two_stages/reranker.py +0 -117
- replay/experimental/scenarios/two_stages/two_stages_scenario.py +0 -757
- replay/experimental/utils/__init__.py +0 -0
- replay/experimental/utils/logger.py +0 -24
- replay/experimental/utils/model_handler.py +0 -186
- replay/experimental/utils/session_handler.py +0 -44
- {replay_rec-0.20.1rc0.dist-info → replay_rec-0.20.2.dist-info}/WHEEL +0 -0
- {replay_rec-0.20.1rc0.dist-info → replay_rec-0.20.2.dist-info}/licenses/LICENSE +0 -0
- {replay_rec-0.20.1rc0.dist-info → replay_rec-0.20.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,175 +0,0 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
2
|
-
|
|
3
|
-
from replay.utils import DataFrameLike, IntOrList, NumType, PandasDataFrame
|
|
4
|
-
from replay.utils.spark_utils import convert2spark
|
|
5
|
-
|
|
6
|
-
from .base_metric import Metric, NCISMetric, RecOnlyMetric, get_enriched_recommendations
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class Experiment:
|
|
10
|
-
"""
|
|
11
|
-
This class calculates and stores metric values.
|
|
12
|
-
Initialize it with test data and a dictionary mapping metrics to their depth cut-offs.
|
|
13
|
-
|
|
14
|
-
Results are available with ``pandas_df`` attribute.
|
|
15
|
-
|
|
16
|
-
Example:
|
|
17
|
-
|
|
18
|
-
>>> import pandas as pd
|
|
19
|
-
>>> from replay.experimental.metrics import Coverage, NDCG, Precision, Surprisal
|
|
20
|
-
>>> log = pd.DataFrame({"user_idx": [2, 2, 2, 1], "item_idx": [1, 2, 3, 3], "relevance": [5, 5, 5, 5]})
|
|
21
|
-
>>> test = pd.DataFrame({"user_idx": [1, 1, 1], "item_idx": [1, 2, 3], "relevance": [5, 3, 4]})
|
|
22
|
-
>>> pred = pd.DataFrame({"user_idx": [1, 1, 1], "item_idx": [4, 1, 3], "relevance": [5, 4, 5]})
|
|
23
|
-
>>> recs = pd.DataFrame({"user_idx": [1, 1, 1], "item_idx": [1, 4, 5], "relevance": [5, 4, 5]})
|
|
24
|
-
>>> ex = Experiment(test, {NDCG(): [2, 3], Surprisal(log): 3})
|
|
25
|
-
>>> ex.add_result("baseline", recs)
|
|
26
|
-
>>> ex.add_result("baseline_gt_users", recs, ground_truth_users=pd.DataFrame({"user_idx": [1, 3]}))
|
|
27
|
-
>>> ex.add_result("model", pred)
|
|
28
|
-
>>> ex.results
|
|
29
|
-
NDCG@2 NDCG@3 Surprisal@3
|
|
30
|
-
baseline 0.386853 0.296082 1.000000
|
|
31
|
-
baseline_gt_users 0.193426 0.148041 0.500000
|
|
32
|
-
model 0.386853 0.530721 0.666667
|
|
33
|
-
>>> ex.compare("baseline")
|
|
34
|
-
NDCG@2 NDCG@3 Surprisal@3
|
|
35
|
-
baseline – – –
|
|
36
|
-
baseline_gt_users -50.0% -50.0% -50.0%
|
|
37
|
-
model 0.0% 79.25% -33.33%
|
|
38
|
-
>>> ex = Experiment(test, {Precision(): [3]}, calc_median=True, calc_conf_interval=0.95)
|
|
39
|
-
>>> ex.add_result("baseline", recs)
|
|
40
|
-
>>> ex.add_result("model", pred)
|
|
41
|
-
>>> ex.results
|
|
42
|
-
Precision@3 Precision@3_median Precision@3_0.95_conf_interval
|
|
43
|
-
baseline 0.333333 0.333333 0.0
|
|
44
|
-
model 0.666667 0.666667 0.0
|
|
45
|
-
>>> ex = Experiment(test, {Coverage(log): 3}, calc_median=True, calc_conf_interval=0.95)
|
|
46
|
-
>>> ex.add_result("baseline", recs)
|
|
47
|
-
>>> ex.add_result("model", pred)
|
|
48
|
-
>>> ex.results
|
|
49
|
-
Coverage@3 Coverage@3_median Coverage@3_0.95_conf_interval
|
|
50
|
-
baseline 1.0 1.0 0.0
|
|
51
|
-
model 1.0 1.0 0.0
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
def __init__(
|
|
55
|
-
self,
|
|
56
|
-
test: Any,
|
|
57
|
-
metrics: dict[Metric, IntOrList],
|
|
58
|
-
calc_median: bool = False,
|
|
59
|
-
calc_conf_interval: Optional[float] = None,
|
|
60
|
-
):
|
|
61
|
-
"""
|
|
62
|
-
:param test: test DataFrame
|
|
63
|
-
:param metrics: Dictionary of metrics to calculate.
|
|
64
|
-
Key -- metric, value -- ``int`` or a list of ints.
|
|
65
|
-
:param calc_median: flag to calculate median value across users
|
|
66
|
-
:param calc_conf_interval: quantile value for the calculation of the confidence interval.
|
|
67
|
-
Resulting value is the half of confidence interval.
|
|
68
|
-
"""
|
|
69
|
-
self.test = convert2spark(test)
|
|
70
|
-
self.results = PandasDataFrame()
|
|
71
|
-
self.metrics = metrics
|
|
72
|
-
self.calc_median = calc_median
|
|
73
|
-
self.calc_conf_interval = calc_conf_interval
|
|
74
|
-
|
|
75
|
-
def add_result(
|
|
76
|
-
self,
|
|
77
|
-
name: str,
|
|
78
|
-
pred: DataFrameLike,
|
|
79
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
80
|
-
) -> None:
|
|
81
|
-
"""
|
|
82
|
-
Calculate metrics for predictions
|
|
83
|
-
|
|
84
|
-
:param name: name of the run to store in the resulting DataFrame
|
|
85
|
-
:param pred: model recommendations
|
|
86
|
-
:param ground_truth_users: list of users to consider in metric calculation.
|
|
87
|
-
if None, only the users from ground_truth are considered.
|
|
88
|
-
"""
|
|
89
|
-
max_k = 0
|
|
90
|
-
for current_k in self.metrics.values():
|
|
91
|
-
max_k = max((*current_k, max_k) if isinstance(current_k, list) else (current_k, max_k))
|
|
92
|
-
|
|
93
|
-
recs = get_enriched_recommendations(pred, self.test, max_k, ground_truth_users).cache()
|
|
94
|
-
for metric, k_list in sorted(self.metrics.items(), key=lambda x: str(x[0])):
|
|
95
|
-
enriched = None
|
|
96
|
-
if isinstance(metric, (RecOnlyMetric, NCISMetric)):
|
|
97
|
-
enriched = metric._get_enriched_recommendations(pred, self.test, max_k, ground_truth_users)
|
|
98
|
-
values, median, conf_interval = self._calculate(metric, enriched or recs, k_list)
|
|
99
|
-
|
|
100
|
-
if isinstance(k_list, int):
|
|
101
|
-
self._add_metric(
|
|
102
|
-
name,
|
|
103
|
-
metric,
|
|
104
|
-
k_list,
|
|
105
|
-
values,
|
|
106
|
-
median,
|
|
107
|
-
conf_interval,
|
|
108
|
-
)
|
|
109
|
-
else:
|
|
110
|
-
for k, val in sorted(values.items(), key=lambda x: x[0]):
|
|
111
|
-
self._add_metric(
|
|
112
|
-
name,
|
|
113
|
-
metric,
|
|
114
|
-
k,
|
|
115
|
-
val,
|
|
116
|
-
None if median is None else median[k],
|
|
117
|
-
None if conf_interval is None else conf_interval[k],
|
|
118
|
-
)
|
|
119
|
-
recs.unpersist()
|
|
120
|
-
|
|
121
|
-
def _calculate(self, metric, enriched, k_list):
|
|
122
|
-
median = None
|
|
123
|
-
conf_interval = None
|
|
124
|
-
values = metric._mean(enriched, k_list)
|
|
125
|
-
if self.calc_median:
|
|
126
|
-
median = metric._median(enriched, k_list)
|
|
127
|
-
if self.calc_conf_interval is not None:
|
|
128
|
-
conf_interval = metric._conf_interval(enriched, k_list, self.calc_conf_interval)
|
|
129
|
-
return values, median, conf_interval
|
|
130
|
-
|
|
131
|
-
def _add_metric(
|
|
132
|
-
self,
|
|
133
|
-
name: str,
|
|
134
|
-
metric: Metric,
|
|
135
|
-
k: int,
|
|
136
|
-
value: NumType,
|
|
137
|
-
median: Optional[NumType],
|
|
138
|
-
conf_interval: Optional[NumType],
|
|
139
|
-
):
|
|
140
|
-
"""
|
|
141
|
-
Add metric for a specific k
|
|
142
|
-
|
|
143
|
-
:param name: name to save results
|
|
144
|
-
:param metric: metric object
|
|
145
|
-
:param k: length of the recommendation list
|
|
146
|
-
:param value: metric value
|
|
147
|
-
:param median: median value
|
|
148
|
-
:param conf_interval: confidence interval value
|
|
149
|
-
"""
|
|
150
|
-
self.results.at[name, f"{metric}@{k}"] = value
|
|
151
|
-
if median is not None:
|
|
152
|
-
self.results.at[name, f"{metric}@{k}_median"] = median
|
|
153
|
-
if conf_interval is not None:
|
|
154
|
-
self.results.at[name, f"{metric}@{k}_{self.calc_conf_interval}_conf_interval"] = conf_interval
|
|
155
|
-
|
|
156
|
-
def compare(self, name: str) -> PandasDataFrame:
|
|
157
|
-
"""
|
|
158
|
-
Show results as a percentage difference to record ``name``.
|
|
159
|
-
|
|
160
|
-
:param name: name of the baseline record
|
|
161
|
-
:return: results table in a percentage format
|
|
162
|
-
"""
|
|
163
|
-
if name not in self.results.index:
|
|
164
|
-
msg = f"No results for model {name}"
|
|
165
|
-
raise ValueError(msg)
|
|
166
|
-
columns = [column for column in self.results.columns if column[-1].isdigit()]
|
|
167
|
-
data_frame = self.results[columns].copy()
|
|
168
|
-
baseline = data_frame.loc[name]
|
|
169
|
-
for idx in data_frame.index:
|
|
170
|
-
if idx != name:
|
|
171
|
-
diff = data_frame.loc[idx] / baseline - 1
|
|
172
|
-
data_frame.loc[idx] = [str(round(v * 100, 2)) + "%" for v in diff]
|
|
173
|
-
else:
|
|
174
|
-
data_frame.loc[name] = ["–"] * len(baseline)
|
|
175
|
-
return data_frame
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from .base_metric import Metric
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class HitRate(Metric):
|
|
5
|
-
"""
|
|
6
|
-
Percentage of users that have at least one
|
|
7
|
-
correctly recommended item among top-k.
|
|
8
|
-
|
|
9
|
-
.. math::
|
|
10
|
-
HitRate@K(i) = \\max_{j \\in [1..K]}\\mathbb{1}_{r_{ij}}
|
|
11
|
-
|
|
12
|
-
.. math::
|
|
13
|
-
HitRate@K = \\frac {\\sum_{i=1}^{N}HitRate@K(i)}{N}
|
|
14
|
-
|
|
15
|
-
:math:`\\mathbb{1}_{r_{ij}}` -- indicator function stating that user :math:`i` interacted with item :math:`j`
|
|
16
|
-
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
_scala_udf_name = "getHitRateMetricValue"
|
|
20
|
-
|
|
21
|
-
@staticmethod
|
|
22
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
23
|
-
for i in pred[:k]:
|
|
24
|
-
if i in ground_truth:
|
|
25
|
-
return 1
|
|
26
|
-
return 0
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from .base_metric import Metric
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class MAP(Metric):
|
|
5
|
-
"""
|
|
6
|
-
Mean Average Precision -- average the ``Precision`` at relevant positions for each user,
|
|
7
|
-
and then calculate the mean across all users.
|
|
8
|
-
|
|
9
|
-
.. math::
|
|
10
|
-
&AP@K(i) = \\frac 1K \\sum_{j=1}^{K}\\mathbb{1}_{r_{ij}}Precision@j(i)
|
|
11
|
-
|
|
12
|
-
&MAP@K = \\frac {\\sum_{i=1}^{N}AP@K(i)}{N}
|
|
13
|
-
|
|
14
|
-
:math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing if user :math:`i` interacted with item :math:`j`
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
_scala_udf_name = "getMAPMetricValue"
|
|
18
|
-
|
|
19
|
-
@staticmethod
|
|
20
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
21
|
-
length = min(k, len(pred))
|
|
22
|
-
if len(ground_truth) == 0 or len(pred) == 0:
|
|
23
|
-
return 0
|
|
24
|
-
tp_cum = 0
|
|
25
|
-
result = 0
|
|
26
|
-
for i in range(length):
|
|
27
|
-
if pred[i] in ground_truth:
|
|
28
|
-
tp_cum += 1
|
|
29
|
-
result += tp_cum / (i + 1)
|
|
30
|
-
return result / k
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from .base_metric import Metric
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class MRR(Metric):
|
|
5
|
-
"""
|
|
6
|
-
Mean Reciprocal Rank --
|
|
7
|
-
Reciprocal Rank is the inverse position of the first relevant item among top-k recommendations,
|
|
8
|
-
:math:`\\frac {1}{rank_i}`. This value is averaged by all users.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
_scala_udf_name = "getMRRMetricValue"
|
|
12
|
-
|
|
13
|
-
@staticmethod
|
|
14
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
15
|
-
for i in range(min(k, len(pred))):
|
|
16
|
-
if pred[i] in ground_truth:
|
|
17
|
-
return 1 / (1 + i)
|
|
18
|
-
return 0
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
from .base_metric import NCISMetric
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class NCISPrecision(NCISMetric):
|
|
7
|
-
"""
|
|
8
|
-
Share of relevant items among top ``K`` recommendations with NCIS weighting.
|
|
9
|
-
|
|
10
|
-
.. math::
|
|
11
|
-
Precision@K(i) = \\frac {\\sum_{j=1}^{K}\\mathbb{1}_{r_{ij} w_{ij}}}{\\sum_{j=1}^{K} w_{ij}}
|
|
12
|
-
|
|
13
|
-
.. math::
|
|
14
|
-
Precision@K = \\frac {\\sum_{i=1}^{N}Precision@K(i)}{N}
|
|
15
|
-
|
|
16
|
-
:math:`\\mathbb{1}_{r_{ij}}` -- indicator function
|
|
17
|
-
showing that user :math:`i` interacted with item :math:`j`
|
|
18
|
-
:math:`w_{ij}` -- NCIS weight, calculated as ratio of current policy score on previous
|
|
19
|
-
policy score with clipping and optional activation over policy scores (relevance).
|
|
20
|
-
Source: arxiv.org/abs/1801.07030
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
_scala_udf_name = "getNCISPrecisionMetricValue"
|
|
24
|
-
|
|
25
|
-
@staticmethod
|
|
26
|
-
def _get_metric_value_by_user(k, *args):
|
|
27
|
-
pred, ground_truth, pred_weights = args
|
|
28
|
-
if len(pred) == 0 or len(ground_truth) == 0:
|
|
29
|
-
return 0
|
|
30
|
-
mask = np.isin(pred[:k], ground_truth)
|
|
31
|
-
return sum(np.array(pred_weights)[mask]) / sum(pred_weights[:k])
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
import math
|
|
2
|
-
|
|
3
|
-
from .base_metric import Metric
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class NDCG(Metric):
|
|
7
|
-
"""
|
|
8
|
-
Normalized Discounted Cumulative Gain is a metric
|
|
9
|
-
that takes into account positions of relevant items.
|
|
10
|
-
|
|
11
|
-
This is the binary version, it takes into account
|
|
12
|
-
whether the item was consumed or not, relevance value is ignored.
|
|
13
|
-
|
|
14
|
-
.. math::
|
|
15
|
-
DCG@K(i) = \\sum_{j=1}^{K}\\frac{\\mathbb{1}_{r_{ij}}}{\\log_2 (j+1)}
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
:math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing that user :math:`i` interacted with item :math:`j`
|
|
19
|
-
|
|
20
|
-
To get from :math:`DCG` to :math:`nDCG` we calculate the biggest possible value of `DCG`
|
|
21
|
-
for user :math:`i` and recommendation length :math:`K`.
|
|
22
|
-
|
|
23
|
-
.. math::
|
|
24
|
-
IDCG@K(i) = max(DCG@K(i)) = \\sum_{j=1}^{K}\\frac{\\mathbb{1}_{j\\le|Rel_i|}}{\\log_2 (j+1)}
|
|
25
|
-
|
|
26
|
-
.. math::
|
|
27
|
-
nDCG@K(i) = \\frac {DCG@K(i)}{IDCG@K(i)}
|
|
28
|
-
|
|
29
|
-
:math:`|Rel_i|` -- number of relevant items for user :math:`i`
|
|
30
|
-
|
|
31
|
-
Metric is averaged by users.
|
|
32
|
-
|
|
33
|
-
.. math::
|
|
34
|
-
nDCG@K = \\frac {\\sum_{i=1}^{N}nDCG@K(i)}{N}
|
|
35
|
-
"""
|
|
36
|
-
|
|
37
|
-
_scala_udf_name = "getNDCGMetricValue"
|
|
38
|
-
|
|
39
|
-
@staticmethod
|
|
40
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
41
|
-
if len(pred) == 0 or len(ground_truth) == 0:
|
|
42
|
-
return 0.0
|
|
43
|
-
pred_len = min(k, len(pred))
|
|
44
|
-
ground_truth_len = min(k, len(ground_truth))
|
|
45
|
-
denom = [1 / math.log2(i + 2) for i in range(k)]
|
|
46
|
-
dcg = sum(denom[i] for i in range(pred_len) if pred[i] in ground_truth)
|
|
47
|
-
idcg = sum(denom[:ground_truth_len])
|
|
48
|
-
|
|
49
|
-
return dcg / idcg
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from .base_metric import Metric
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Precision(Metric):
|
|
5
|
-
"""
|
|
6
|
-
Mean percentage of relevant items among top ``K`` recommendations.
|
|
7
|
-
|
|
8
|
-
.. math::
|
|
9
|
-
Precision@K(i) = \\frac {\\sum_{j=1}^{K}\\mathbb{1}_{r_{ij}}}{K}
|
|
10
|
-
|
|
11
|
-
.. math::
|
|
12
|
-
Precision@K = \\frac {\\sum_{i=1}^{N}Precision@K(i)}{N}
|
|
13
|
-
|
|
14
|
-
:math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing that user :math:`i` interacted with item :math:`j`"""
|
|
15
|
-
|
|
16
|
-
_scala_udf_name = "getPrecisionMetricValue"
|
|
17
|
-
|
|
18
|
-
@staticmethod
|
|
19
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
20
|
-
if len(pred) == 0:
|
|
21
|
-
return 0
|
|
22
|
-
return len(set(pred[:k]) & set(ground_truth)) / k
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
from .base_metric import Metric
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Recall(Metric):
|
|
5
|
-
"""
|
|
6
|
-
Mean percentage of relevant items, that was shown among top ``K`` recommendations.
|
|
7
|
-
|
|
8
|
-
.. math::
|
|
9
|
-
Recall@K(i) = \\frac {\\sum_{j=1}^{K}\\mathbb{1}_{r_{ij}}}{|Rel_i|}
|
|
10
|
-
|
|
11
|
-
.. math::
|
|
12
|
-
Recall@K = \\frac {\\sum_{i=1}^{N}Recall@K(i)}{N}
|
|
13
|
-
|
|
14
|
-
:math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing that user :math:`i` interacted with item :math:`j`
|
|
15
|
-
|
|
16
|
-
:math:`|Rel_i|` -- the number of relevant items for user :math:`i`
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
_scala_udf_name = "getRecallMetricValue"
|
|
20
|
-
|
|
21
|
-
@staticmethod
|
|
22
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
23
|
-
if len(ground_truth) == 0:
|
|
24
|
-
return 0.0
|
|
25
|
-
return len(set(pred[:k]) & set(ground_truth)) / len(ground_truth)
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
from .base_metric import Metric
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class RocAuc(Metric):
|
|
5
|
-
"""
|
|
6
|
-
Receiver Operating Characteristic/Area Under the Curve is the aggregated performance measure,
|
|
7
|
-
that depends only on the order of recommended items.
|
|
8
|
-
It can be interpreted as the fraction of object pairs (object of class 1, object of class 0)
|
|
9
|
-
that were correctly ordered by the model.
|
|
10
|
-
The bigger the value of AUC, the better the classification model.
|
|
11
|
-
|
|
12
|
-
.. math::
|
|
13
|
-
ROCAUC@K(i) = \\frac {\\sum_{s=1}^{K}\\sum_{t=1}^{K}
|
|
14
|
-
\\mathbb{1}_{r_{si}<r_{ti}}
|
|
15
|
-
\\mathbb{1}_{gt_{si}<gt_{ti}}}
|
|
16
|
-
{\\sum_{s=1}^{K}\\sum_{t=1}^{K} \\mathbb{1}_{gt_{si}<gt_{tj}}}
|
|
17
|
-
|
|
18
|
-
:math:`\\mathbb{1}_{r_{si}<r_{ti}}` -- indicator function showing that recommendation score for
|
|
19
|
-
user :math:`i` for item :math:`s` is bigger than for item :math:`t`
|
|
20
|
-
|
|
21
|
-
:math:`\\mathbb{1}_{gt_{si}<gt_{ti}}` -- indicator function showing that
|
|
22
|
-
user :math:`i` values item :math:`s` more than item :math:`t`.
|
|
23
|
-
|
|
24
|
-
Metric is averaged by all users.
|
|
25
|
-
|
|
26
|
-
.. math::
|
|
27
|
-
ROCAUC@K = \\frac {\\sum_{i=1}^{N}ROCAUC@K(i)}{N}
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
_scala_udf_name = "getRocAucMetricValue"
|
|
31
|
-
|
|
32
|
-
@staticmethod
|
|
33
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
34
|
-
length = min(k, len(pred))
|
|
35
|
-
if len(ground_truth) == 0 or len(pred) == 0:
|
|
36
|
-
return 0
|
|
37
|
-
|
|
38
|
-
fp_cur = 0
|
|
39
|
-
fp_cum = 0
|
|
40
|
-
for item in pred[:length]:
|
|
41
|
-
if item in ground_truth:
|
|
42
|
-
fp_cum += fp_cur
|
|
43
|
-
else:
|
|
44
|
-
fp_cur += 1
|
|
45
|
-
if fp_cur == length:
|
|
46
|
-
return 0
|
|
47
|
-
if fp_cum == 0:
|
|
48
|
-
return 1
|
|
49
|
-
return 1 - fp_cum / (fp_cur * (length - fp_cur))
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
from replay.utils import PYSPARK_AVAILABLE, DataFrameLike, SparkDataFrame
|
|
6
|
-
from replay.utils.spark_utils import convert2spark, get_top_k_recs
|
|
7
|
-
|
|
8
|
-
from .base_metric import RecOnlyMetric, fill_na_with_empty_array, filter_sort
|
|
9
|
-
|
|
10
|
-
if PYSPARK_AVAILABLE:
|
|
11
|
-
from pyspark.sql import (
|
|
12
|
-
functions as sf,
|
|
13
|
-
types as st,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Surprisal(RecOnlyMetric):
|
|
18
|
-
"""
|
|
19
|
-
Measures how many surprising rare items are present in recommendations.
|
|
20
|
-
|
|
21
|
-
.. math::
|
|
22
|
-
\\textit{Self-Information}(j)= -\\log_2 \\frac {u_j}{N}
|
|
23
|
-
|
|
24
|
-
:math:`u_j` -- number of users that interacted with item :math:`j`.
|
|
25
|
-
Cold items are treated as if they were rated by 1 user.
|
|
26
|
-
That is, if they appear in recommendations it will be completely unexpected.
|
|
27
|
-
|
|
28
|
-
Metric is normalized.
|
|
29
|
-
|
|
30
|
-
Surprisal for item :math:`j` is
|
|
31
|
-
|
|
32
|
-
.. math::
|
|
33
|
-
Surprisal(j)= \\frac {\\textit{Self-Information}(j)}{log_2 N}
|
|
34
|
-
|
|
35
|
-
Recommendation list surprisal is the average surprisal of items in it.
|
|
36
|
-
|
|
37
|
-
.. math::
|
|
38
|
-
Surprisal@K(i) = \\frac {\\sum_{j=1}^{K}Surprisal(j)} {K}
|
|
39
|
-
|
|
40
|
-
Final metric is averaged by users.
|
|
41
|
-
|
|
42
|
-
.. math::
|
|
43
|
-
Surprisal@K = \\frac {\\sum_{i=1}^{N}Surprisal@K(i)}{N}
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
_scala_udf_name = "getSurprisalMetricValue"
|
|
47
|
-
|
|
48
|
-
def __init__(self, log: DataFrameLike, use_scala_udf: bool = False):
|
|
49
|
-
"""
|
|
50
|
-
Here we calculate self-information for each item
|
|
51
|
-
|
|
52
|
-
:param log: historical data
|
|
53
|
-
"""
|
|
54
|
-
self._use_scala_udf = use_scala_udf
|
|
55
|
-
self.log = convert2spark(log)
|
|
56
|
-
n_users = self.log.select("user_idx").distinct().count()
|
|
57
|
-
self.item_weights = self.log.groupby("item_idx").agg(
|
|
58
|
-
(sf.log2(n_users / sf.countDistinct("user_idx")) / np.log2(n_users)).alias("rec_weight")
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
@staticmethod
|
|
62
|
-
def _get_metric_value_by_user(k, *args):
|
|
63
|
-
weigths = args[0]
|
|
64
|
-
return sum(weigths[:k]) / k
|
|
65
|
-
|
|
66
|
-
def _get_enriched_recommendations(
|
|
67
|
-
self,
|
|
68
|
-
recommendations: SparkDataFrame,
|
|
69
|
-
ground_truth: SparkDataFrame, # noqa: ARG002
|
|
70
|
-
max_k: int,
|
|
71
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
72
|
-
) -> SparkDataFrame:
|
|
73
|
-
recommendations = convert2spark(recommendations)
|
|
74
|
-
ground_truth_users = convert2spark(ground_truth_users)
|
|
75
|
-
recommendations = get_top_k_recs(recommendations, max_k)
|
|
76
|
-
|
|
77
|
-
recommendations = recommendations.join(self.item_weights, on="item_idx", how="left").fillna(1.0)
|
|
78
|
-
recommendations = filter_sort(recommendations, "rec_weight")
|
|
79
|
-
recommendations = recommendations.select("user_idx", sf.col("rec_weight")).withColumn(
|
|
80
|
-
"rec_weight",
|
|
81
|
-
sf.col("rec_weight").cast(st.ArrayType(st.DoubleType(), True)),
|
|
82
|
-
)
|
|
83
|
-
if ground_truth_users is not None:
|
|
84
|
-
recommendations = fill_na_with_empty_array(
|
|
85
|
-
recommendations.join(ground_truth_users, on="user_idx", how="right"),
|
|
86
|
-
"rec_weight",
|
|
87
|
-
self.item_weights.schema["rec_weight"].dataType,
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
return recommendations
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
from replay.utils import DataFrameLike, SparkDataFrame
|
|
4
|
-
from replay.utils.spark_utils import convert2spark, get_top_k_recs
|
|
5
|
-
|
|
6
|
-
from .base_metric import RecOnlyMetric, fill_na_with_empty_array, filter_sort
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class Unexpectedness(RecOnlyMetric):
|
|
10
|
-
"""
|
|
11
|
-
Fraction of recommended items that are not present in some baseline recommendations.
|
|
12
|
-
|
|
13
|
-
>>> import pandas as pd
|
|
14
|
-
>>> from replay.utils.session_handler import get_spark_session, State
|
|
15
|
-
>>> spark = get_spark_session(1, 1)
|
|
16
|
-
>>> state = State(spark)
|
|
17
|
-
|
|
18
|
-
>>> log = pd.DataFrame({
|
|
19
|
-
... "user_idx": [1, 1, 1],
|
|
20
|
-
... "item_idx": [1, 2, 3],
|
|
21
|
-
... "relevance": [5, 5, 5],
|
|
22
|
-
... "timestamp": [1, 1, 1],
|
|
23
|
-
... })
|
|
24
|
-
>>> recs = pd.DataFrame({
|
|
25
|
-
... "user_idx": [1, 1, 1],
|
|
26
|
-
... "item_idx": [0, 0, 1],
|
|
27
|
-
... "relevance": [5, 5, 5],
|
|
28
|
-
... "timestamp": [1, 1, 1],
|
|
29
|
-
... })
|
|
30
|
-
>>> metric = Unexpectedness(log)
|
|
31
|
-
>>> round(metric(recs, 3), 2)
|
|
32
|
-
0.67
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
_scala_udf_name = "getUnexpectednessMetricValue"
|
|
36
|
-
|
|
37
|
-
def __init__(self, pred: DataFrameLike, use_scala_udf: bool = False):
|
|
38
|
-
"""
|
|
39
|
-
:param pred: model predictions
|
|
40
|
-
"""
|
|
41
|
-
self._use_scala_udf = use_scala_udf
|
|
42
|
-
self.pred = convert2spark(pred)
|
|
43
|
-
|
|
44
|
-
@staticmethod
|
|
45
|
-
def _get_metric_value_by_user(k, *args) -> float:
|
|
46
|
-
pred = args[0]
|
|
47
|
-
base_pred = args[1]
|
|
48
|
-
if len(pred) == 0:
|
|
49
|
-
return 0
|
|
50
|
-
return 1.0 - len(set(pred[:k]) & set(base_pred[:k])) / k
|
|
51
|
-
|
|
52
|
-
def _get_enriched_recommendations(
|
|
53
|
-
self,
|
|
54
|
-
recommendations: SparkDataFrame,
|
|
55
|
-
ground_truth: SparkDataFrame, # noqa: ARG002
|
|
56
|
-
max_k: int,
|
|
57
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
58
|
-
) -> SparkDataFrame:
|
|
59
|
-
recommendations = convert2spark(recommendations)
|
|
60
|
-
ground_truth_users = convert2spark(ground_truth_users)
|
|
61
|
-
base_pred = self.pred
|
|
62
|
-
|
|
63
|
-
# TO DO: preprocess base_recs once in __init__
|
|
64
|
-
|
|
65
|
-
base_recs = filter_sort(base_pred).withColumnRenamed("pred", "base_pred")
|
|
66
|
-
|
|
67
|
-
# if there are duplicates in recommendations,
|
|
68
|
-
# we will leave fewer than k recommendations after sort_udf
|
|
69
|
-
recommendations = get_top_k_recs(recommendations, k=max_k)
|
|
70
|
-
recommendations = filter_sort(recommendations)
|
|
71
|
-
recommendations = recommendations.join(base_recs, how="right", on=["user_idx"])
|
|
72
|
-
|
|
73
|
-
if ground_truth_users is not None:
|
|
74
|
-
recommendations = recommendations.join(ground_truth_users, on="user_idx", how="right")
|
|
75
|
-
|
|
76
|
-
return fill_na_with_empty_array(recommendations, "pred", base_pred.schema["item_idx"].dataType)
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
|
|
3
|
-
from replay.experimental.models.admm_slim import ADMMSLIM
|
|
4
|
-
from replay.experimental.models.base_torch_rec import TorchRecommender
|
|
5
|
-
from replay.experimental.models.cql import CQL
|
|
6
|
-
from replay.experimental.models.ddpg import DDPG
|
|
7
|
-
from replay.experimental.models.dt4rec.dt4rec import DT4Rec
|
|
8
|
-
from replay.experimental.models.hierarchical_recommender import HierarchicalRecommender
|
|
9
|
-
from replay.experimental.models.implicit_wrap import ImplicitWrap
|
|
10
|
-
from replay.experimental.models.mult_vae import MultVAE
|
|
11
|
-
from replay.experimental.models.neural_ts import NeuralTS
|
|
12
|
-
from replay.experimental.models.neuromf import NeuroMF
|
|
13
|
-
from replay.experimental.models.scala_als import ScalaALSWrap
|
|
14
|
-
from replay.experimental.models.u_lin_ucb import ULinUCB
|
|
15
|
-
|
|
16
|
-
__all__ = [
|
|
17
|
-
"ADMMSLIM",
|
|
18
|
-
"CQL",
|
|
19
|
-
"DDPG",
|
|
20
|
-
"DT4Rec",
|
|
21
|
-
"HierarchicalRecommender",
|
|
22
|
-
"ImplicitWrap",
|
|
23
|
-
"MultVAE",
|
|
24
|
-
"NeuralTS",
|
|
25
|
-
"NeuroMF",
|
|
26
|
-
"ScalaALSWrap",
|
|
27
|
-
"TorchRecommender",
|
|
28
|
-
"ULinUCB",
|
|
29
|
-
]
|
|
30
|
-
|
|
31
|
-
CONDITIONAL_IMPORTS = {"LightFMWrap": "replay.experimental.models.lightfm_wrap"}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class ConditionalAccessError(Exception):
|
|
35
|
-
"""Raised when trying to access conditional elements from parent module instead of a direct import."""
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def __getattr__(name: str) -> Any:
|
|
39
|
-
if name in CONDITIONAL_IMPORTS:
|
|
40
|
-
msg = (
|
|
41
|
-
f"{name} relies on manual dependency installation and cannot be accessed via higher-level modules. "
|
|
42
|
-
f"If you wish to use this attribute, import it directly from {CONDITIONAL_IMPORTS[name]}"
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
raise ConditionalAccessError(msg)
|
|
46
|
-
|
|
47
|
-
if name in __all__:
|
|
48
|
-
return globals()[name]
|
|
49
|
-
msg = f"module {__name__!r} has no attribute {name!r}"
|
|
50
|
-
raise AttributeError(msg)
|