replay-rec 0.20.2__py3-none-any.whl → 0.20.3rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. replay/__init__.py +1 -1
  2. replay/data/nn/sequential_dataset.py +8 -2
  3. replay/experimental/__init__.py +0 -0
  4. replay/experimental/metrics/__init__.py +62 -0
  5. replay/experimental/metrics/base_metric.py +603 -0
  6. replay/experimental/metrics/coverage.py +97 -0
  7. replay/experimental/metrics/experiment.py +175 -0
  8. replay/experimental/metrics/hitrate.py +26 -0
  9. replay/experimental/metrics/map.py +30 -0
  10. replay/experimental/metrics/mrr.py +18 -0
  11. replay/experimental/metrics/ncis_precision.py +31 -0
  12. replay/experimental/metrics/ndcg.py +49 -0
  13. replay/experimental/metrics/precision.py +22 -0
  14. replay/experimental/metrics/recall.py +25 -0
  15. replay/experimental/metrics/rocauc.py +49 -0
  16. replay/experimental/metrics/surprisal.py +90 -0
  17. replay/experimental/metrics/unexpectedness.py +76 -0
  18. replay/experimental/models/__init__.py +50 -0
  19. replay/experimental/models/admm_slim.py +257 -0
  20. replay/experimental/models/base_neighbour_rec.py +200 -0
  21. replay/experimental/models/base_rec.py +1386 -0
  22. replay/experimental/models/base_torch_rec.py +234 -0
  23. replay/experimental/models/cql.py +454 -0
  24. replay/experimental/models/ddpg.py +932 -0
  25. replay/experimental/models/dt4rec/__init__.py +0 -0
  26. replay/experimental/models/dt4rec/dt4rec.py +189 -0
  27. replay/experimental/models/dt4rec/gpt1.py +401 -0
  28. replay/experimental/models/dt4rec/trainer.py +127 -0
  29. replay/experimental/models/dt4rec/utils.py +264 -0
  30. replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
  31. replay/experimental/models/extensions/spark_custom_models/als_extension.py +792 -0
  32. replay/experimental/models/hierarchical_recommender.py +331 -0
  33. replay/experimental/models/implicit_wrap.py +131 -0
  34. replay/experimental/models/lightfm_wrap.py +303 -0
  35. replay/experimental/models/mult_vae.py +332 -0
  36. replay/experimental/models/neural_ts.py +986 -0
  37. replay/experimental/models/neuromf.py +406 -0
  38. replay/experimental/models/scala_als.py +293 -0
  39. replay/experimental/models/u_lin_ucb.py +115 -0
  40. replay/experimental/nn/data/__init__.py +1 -0
  41. replay/experimental/nn/data/schema_builder.py +102 -0
  42. replay/experimental/preprocessing/__init__.py +3 -0
  43. replay/experimental/preprocessing/data_preparator.py +839 -0
  44. replay/experimental/preprocessing/padder.py +229 -0
  45. replay/experimental/preprocessing/sequence_generator.py +208 -0
  46. replay/experimental/scenarios/__init__.py +1 -0
  47. replay/experimental/scenarios/obp_wrapper/__init__.py +8 -0
  48. replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +74 -0
  49. replay/experimental/scenarios/obp_wrapper/replay_offline.py +261 -0
  50. replay/experimental/scenarios/obp_wrapper/utils.py +85 -0
  51. replay/experimental/scenarios/two_stages/__init__.py +0 -0
  52. replay/experimental/scenarios/two_stages/reranker.py +117 -0
  53. replay/experimental/scenarios/two_stages/two_stages_scenario.py +757 -0
  54. replay/experimental/utils/__init__.py +0 -0
  55. replay/experimental/utils/logger.py +24 -0
  56. replay/experimental/utils/model_handler.py +186 -0
  57. replay/experimental/utils/session_handler.py +44 -0
  58. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/METADATA +11 -17
  59. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/RECORD +62 -7
  60. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/WHEEL +0 -0
  61. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/licenses/LICENSE +0 -0
  62. {replay_rec-0.20.2.dist-info → replay_rec-0.20.3rc0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,97 @@
1
+ from typing import Optional, Union
2
+
3
+ from replay.utils import PYSPARK_AVAILABLE, DataFrameLike, IntOrList, NumType, SparkDataFrame
4
+ from replay.utils.spark_utils import convert2spark
5
+
6
+ from .base_metric import RecOnlyMetric, process_k
7
+
8
+ if PYSPARK_AVAILABLE:
9
+ from pyspark.sql import (
10
+ Window,
11
+ functions as sf,
12
+ )
13
+
14
+
15
+ class Coverage(RecOnlyMetric):
16
+ """
17
+ Metric calculation is as follows:
18
+
19
+ * take ``K`` recommendations with the biggest ``relevance`` for each ``user_id``
20
+ * count the number of distinct ``item_id`` in these recommendations
21
+ * divide it by the number of items in the whole data set
22
+
23
+ """
24
+
25
+ def __init__(self, log: DataFrameLike):
26
+ """
27
+ :param log: pandas or Spark DataFrame
28
+ It is important for ``log`` to contain all available items.
29
+ """
30
+ self.items = convert2spark(log).select("item_idx").distinct()
31
+ self.item_count = self.items.count()
32
+
33
+ @staticmethod
34
+ def _get_metric_value_by_user(k, *args):
35
+ # not averaged by users
36
+ pass
37
+
38
+ def _get_enriched_recommendations(
39
+ self,
40
+ recommendations: DataFrameLike,
41
+ ground_truth: DataFrameLike, # noqa: ARG002
42
+ max_k: int, # noqa: ARG002
43
+ ground_truth_users: Optional[DataFrameLike] = None,
44
+ ) -> SparkDataFrame:
45
+ recommendations = convert2spark(recommendations)
46
+ if ground_truth_users is not None:
47
+ ground_truth_users = convert2spark(ground_truth_users)
48
+ return recommendations.join(ground_truth_users, on="user_idx", how="inner")
49
+ return recommendations
50
+
51
+ def _conf_interval(
52
+ self,
53
+ recs: DataFrameLike, # noqa: ARG002
54
+ k_list: IntOrList,
55
+ alpha: float = 0.95, # noqa: ARG002
56
+ ) -> Union[dict[int, float], float]:
57
+ if isinstance(k_list, int):
58
+ return 0.0
59
+ return dict.fromkeys(k_list, 0.0)
60
+
61
+ def _median(
62
+ self,
63
+ recs: DataFrameLike,
64
+ k_list: IntOrList,
65
+ ) -> Union[dict[int, NumType], NumType]:
66
+ return self._mean(recs, k_list)
67
+
68
+ @process_k
69
+ def _mean(
70
+ self,
71
+ recs: SparkDataFrame,
72
+ k_list: list,
73
+ ) -> Union[dict[int, NumType], NumType]:
74
+ unknown_item_count = recs.select("item_idx").distinct().exceptAll(self.items).count()
75
+ if unknown_item_count > 0:
76
+ self.logger.warning(
77
+ "Recommendations contain items that were not present in the log. "
78
+ r"The resulting metric value can be more than 1.0 ¯\_(ツ)_/¯"
79
+ )
80
+
81
+ best_positions = (
82
+ recs.withColumn(
83
+ "row_num",
84
+ sf.row_number().over(Window.partitionBy("user_idx").orderBy(sf.desc("relevance"))),
85
+ )
86
+ .select("item_idx", "row_num")
87
+ .groupBy("item_idx")
88
+ .agg(sf.min("row_num").alias("best_position"))
89
+ .cache()
90
+ )
91
+
92
+ res = {}
93
+ for current_k in k_list:
94
+ res[current_k] = best_positions.filter(sf.col("best_position") <= current_k).count() / self.item_count
95
+
96
+ best_positions.unpersist()
97
+ return res
@@ -0,0 +1,175 @@
1
+ from typing import Any, Optional
2
+
3
+ from replay.utils import DataFrameLike, IntOrList, NumType, PandasDataFrame
4
+ from replay.utils.spark_utils import convert2spark
5
+
6
+ from .base_metric import Metric, NCISMetric, RecOnlyMetric, get_enriched_recommendations
7
+
8
+
9
+ class Experiment:
10
+ """
11
+ This class calculates and stores metric values.
12
+ Initialize it with test data and a dictionary mapping metrics to their depth cut-offs.
13
+
14
+ Results are available with ``pandas_df`` attribute.
15
+
16
+ Example:
17
+
18
+ >>> import pandas as pd
19
+ >>> from replay.experimental.metrics import Coverage, NDCG, Precision, Surprisal
20
+ >>> log = pd.DataFrame({"user_idx": [2, 2, 2, 1], "item_idx": [1, 2, 3, 3], "relevance": [5, 5, 5, 5]})
21
+ >>> test = pd.DataFrame({"user_idx": [1, 1, 1], "item_idx": [1, 2, 3], "relevance": [5, 3, 4]})
22
+ >>> pred = pd.DataFrame({"user_idx": [1, 1, 1], "item_idx": [4, 1, 3], "relevance": [5, 4, 5]})
23
+ >>> recs = pd.DataFrame({"user_idx": [1, 1, 1], "item_idx": [1, 4, 5], "relevance": [5, 4, 5]})
24
+ >>> ex = Experiment(test, {NDCG(): [2, 3], Surprisal(log): 3})
25
+ >>> ex.add_result("baseline", recs)
26
+ >>> ex.add_result("baseline_gt_users", recs, ground_truth_users=pd.DataFrame({"user_idx": [1, 3]}))
27
+ >>> ex.add_result("model", pred)
28
+ >>> ex.results
29
+ NDCG@2 NDCG@3 Surprisal@3
30
+ baseline 0.386853 0.296082 1.000000
31
+ baseline_gt_users 0.193426 0.148041 0.500000
32
+ model 0.386853 0.530721 0.666667
33
+ >>> ex.compare("baseline")
34
+ NDCG@2 NDCG@3 Surprisal@3
35
+ baseline – – –
36
+ baseline_gt_users -50.0% -50.0% -50.0%
37
+ model 0.0% 79.25% -33.33%
38
+ >>> ex = Experiment(test, {Precision(): [3]}, calc_median=True, calc_conf_interval=0.95)
39
+ >>> ex.add_result("baseline", recs)
40
+ >>> ex.add_result("model", pred)
41
+ >>> ex.results
42
+ Precision@3 Precision@3_median Precision@3_0.95_conf_interval
43
+ baseline 0.333333 0.333333 0.0
44
+ model 0.666667 0.666667 0.0
45
+ >>> ex = Experiment(test, {Coverage(log): 3}, calc_median=True, calc_conf_interval=0.95)
46
+ >>> ex.add_result("baseline", recs)
47
+ >>> ex.add_result("model", pred)
48
+ >>> ex.results
49
+ Coverage@3 Coverage@3_median Coverage@3_0.95_conf_interval
50
+ baseline 1.0 1.0 0.0
51
+ model 1.0 1.0 0.0
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ test: Any,
57
+ metrics: dict[Metric, IntOrList],
58
+ calc_median: bool = False,
59
+ calc_conf_interval: Optional[float] = None,
60
+ ):
61
+ """
62
+ :param test: test DataFrame
63
+ :param metrics: Dictionary of metrics to calculate.
64
+ Key -- metric, value -- ``int`` or a list of ints.
65
+ :param calc_median: flag to calculate median value across users
66
+ :param calc_conf_interval: quantile value for the calculation of the confidence interval.
67
+ Resulting value is the half of confidence interval.
68
+ """
69
+ self.test = convert2spark(test)
70
+ self.results = PandasDataFrame()
71
+ self.metrics = metrics
72
+ self.calc_median = calc_median
73
+ self.calc_conf_interval = calc_conf_interval
74
+
75
+ def add_result(
76
+ self,
77
+ name: str,
78
+ pred: DataFrameLike,
79
+ ground_truth_users: Optional[DataFrameLike] = None,
80
+ ) -> None:
81
+ """
82
+ Calculate metrics for predictions
83
+
84
+ :param name: name of the run to store in the resulting DataFrame
85
+ :param pred: model recommendations
86
+ :param ground_truth_users: list of users to consider in metric calculation.
87
+ if None, only the users from ground_truth are considered.
88
+ """
89
+ max_k = 0
90
+ for current_k in self.metrics.values():
91
+ max_k = max((*current_k, max_k) if isinstance(current_k, list) else (current_k, max_k))
92
+
93
+ recs = get_enriched_recommendations(pred, self.test, max_k, ground_truth_users).cache()
94
+ for metric, k_list in sorted(self.metrics.items(), key=lambda x: str(x[0])):
95
+ enriched = None
96
+ if isinstance(metric, (RecOnlyMetric, NCISMetric)):
97
+ enriched = metric._get_enriched_recommendations(pred, self.test, max_k, ground_truth_users)
98
+ values, median, conf_interval = self._calculate(metric, enriched or recs, k_list)
99
+
100
+ if isinstance(k_list, int):
101
+ self._add_metric(
102
+ name,
103
+ metric,
104
+ k_list,
105
+ values,
106
+ median,
107
+ conf_interval,
108
+ )
109
+ else:
110
+ for k, val in sorted(values.items(), key=lambda x: x[0]):
111
+ self._add_metric(
112
+ name,
113
+ metric,
114
+ k,
115
+ val,
116
+ None if median is None else median[k],
117
+ None if conf_interval is None else conf_interval[k],
118
+ )
119
+ recs.unpersist()
120
+
121
+ def _calculate(self, metric, enriched, k_list):
122
+ median = None
123
+ conf_interval = None
124
+ values = metric._mean(enriched, k_list)
125
+ if self.calc_median:
126
+ median = metric._median(enriched, k_list)
127
+ if self.calc_conf_interval is not None:
128
+ conf_interval = metric._conf_interval(enriched, k_list, self.calc_conf_interval)
129
+ return values, median, conf_interval
130
+
131
+ def _add_metric(
132
+ self,
133
+ name: str,
134
+ metric: Metric,
135
+ k: int,
136
+ value: NumType,
137
+ median: Optional[NumType],
138
+ conf_interval: Optional[NumType],
139
+ ):
140
+ """
141
+ Add metric for a specific k
142
+
143
+ :param name: name to save results
144
+ :param metric: metric object
145
+ :param k: length of the recommendation list
146
+ :param value: metric value
147
+ :param median: median value
148
+ :param conf_interval: confidence interval value
149
+ """
150
+ self.results.at[name, f"{metric}@{k}"] = value
151
+ if median is not None:
152
+ self.results.at[name, f"{metric}@{k}_median"] = median
153
+ if conf_interval is not None:
154
+ self.results.at[name, f"{metric}@{k}_{self.calc_conf_interval}_conf_interval"] = conf_interval
155
+
156
+ def compare(self, name: str) -> PandasDataFrame:
157
+ """
158
+ Show results as a percentage difference to record ``name``.
159
+
160
+ :param name: name of the baseline record
161
+ :return: results table in a percentage format
162
+ """
163
+ if name not in self.results.index:
164
+ msg = f"No results for model {name}"
165
+ raise ValueError(msg)
166
+ columns = [column for column in self.results.columns if column[-1].isdigit()]
167
+ data_frame = self.results[columns].copy()
168
+ baseline = data_frame.loc[name]
169
+ for idx in data_frame.index:
170
+ if idx != name:
171
+ diff = data_frame.loc[idx] / baseline - 1
172
+ data_frame.loc[idx] = [str(round(v * 100, 2)) + "%" for v in diff]
173
+ else:
174
+ data_frame.loc[name] = ["–"] * len(baseline)
175
+ return data_frame
@@ -0,0 +1,26 @@
1
+ from .base_metric import Metric
2
+
3
+
4
+ class HitRate(Metric):
5
+ """
6
+ Percentage of users that have at least one
7
+ correctly recommended item among top-k.
8
+
9
+ .. math::
10
+ HitRate@K(i) = \\max_{j \\in [1..K]}\\mathbb{1}_{r_{ij}}
11
+
12
+ .. math::
13
+ HitRate@K = \\frac {\\sum_{i=1}^{N}HitRate@K(i)}{N}
14
+
15
+ :math:`\\mathbb{1}_{r_{ij}}` -- indicator function stating that user :math:`i` interacted with item :math:`j`
16
+
17
+ """
18
+
19
+ _scala_udf_name = "getHitRateMetricValue"
20
+
21
+ @staticmethod
22
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
23
+ for i in pred[:k]:
24
+ if i in ground_truth:
25
+ return 1
26
+ return 0
@@ -0,0 +1,30 @@
1
+ from .base_metric import Metric
2
+
3
+
4
+ class MAP(Metric):
5
+ """
6
+ Mean Average Precision -- average the ``Precision`` at relevant positions for each user,
7
+ and then calculate the mean across all users.
8
+
9
+ .. math::
10
+ &AP@K(i) = \\frac 1K \\sum_{j=1}^{K}\\mathbb{1}_{r_{ij}}Precision@j(i)
11
+
12
+ &MAP@K = \\frac {\\sum_{i=1}^{N}AP@K(i)}{N}
13
+
14
+ :math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing if user :math:`i` interacted with item :math:`j`
15
+ """
16
+
17
+ _scala_udf_name = "getMAPMetricValue"
18
+
19
+ @staticmethod
20
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
21
+ length = min(k, len(pred))
22
+ if len(ground_truth) == 0 or len(pred) == 0:
23
+ return 0
24
+ tp_cum = 0
25
+ result = 0
26
+ for i in range(length):
27
+ if pred[i] in ground_truth:
28
+ tp_cum += 1
29
+ result += tp_cum / (i + 1)
30
+ return result / k
@@ -0,0 +1,18 @@
1
+ from .base_metric import Metric
2
+
3
+
4
+ class MRR(Metric):
5
+ """
6
+ Mean Reciprocal Rank --
7
+ Reciprocal Rank is the inverse position of the first relevant item among top-k recommendations,
8
+ :math:`\\frac {1}{rank_i}`. This value is averaged by all users.
9
+ """
10
+
11
+ _scala_udf_name = "getMRRMetricValue"
12
+
13
+ @staticmethod
14
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
15
+ for i in range(min(k, len(pred))):
16
+ if pred[i] in ground_truth:
17
+ return 1 / (1 + i)
18
+ return 0
@@ -0,0 +1,31 @@
1
+ import numpy as np
2
+
3
+ from .base_metric import NCISMetric
4
+
5
+
6
+ class NCISPrecision(NCISMetric):
7
+ """
8
+ Share of relevant items among top ``K`` recommendations with NCIS weighting.
9
+
10
+ .. math::
11
+ Precision@K(i) = \\frac {\\sum_{j=1}^{K}\\mathbb{1}_{r_{ij} w_{ij}}}{\\sum_{j=1}^{K} w_{ij}}
12
+
13
+ .. math::
14
+ Precision@K = \\frac {\\sum_{i=1}^{N}Precision@K(i)}{N}
15
+
16
+ :math:`\\mathbb{1}_{r_{ij}}` -- indicator function
17
+ showing that user :math:`i` interacted with item :math:`j`
18
+ :math:`w_{ij}` -- NCIS weight, calculated as ratio of current policy score on previous
19
+ policy score with clipping and optional activation over policy scores (relevance).
20
+ Source: arxiv.org/abs/1801.07030
21
+ """
22
+
23
+ _scala_udf_name = "getNCISPrecisionMetricValue"
24
+
25
+ @staticmethod
26
+ def _get_metric_value_by_user(k, *args):
27
+ pred, ground_truth, pred_weights = args
28
+ if len(pred) == 0 or len(ground_truth) == 0:
29
+ return 0
30
+ mask = np.isin(pred[:k], ground_truth)
31
+ return sum(np.array(pred_weights)[mask]) / sum(pred_weights[:k])
@@ -0,0 +1,49 @@
1
+ import math
2
+
3
+ from .base_metric import Metric
4
+
5
+
6
+ class NDCG(Metric):
7
+ """
8
+ Normalized Discounted Cumulative Gain is a metric
9
+ that takes into account positions of relevant items.
10
+
11
+ This is the binary version, it takes into account
12
+ whether the item was consumed or not, relevance value is ignored.
13
+
14
+ .. math::
15
+ DCG@K(i) = \\sum_{j=1}^{K}\\frac{\\mathbb{1}_{r_{ij}}}{\\log_2 (j+1)}
16
+
17
+
18
+ :math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing that user :math:`i` interacted with item :math:`j`
19
+
20
+ To get from :math:`DCG` to :math:`nDCG` we calculate the biggest possible value of `DCG`
21
+ for user :math:`i` and recommendation length :math:`K`.
22
+
23
+ .. math::
24
+ IDCG@K(i) = max(DCG@K(i)) = \\sum_{j=1}^{K}\\frac{\\mathbb{1}_{j\\le|Rel_i|}}{\\log_2 (j+1)}
25
+
26
+ .. math::
27
+ nDCG@K(i) = \\frac {DCG@K(i)}{IDCG@K(i)}
28
+
29
+ :math:`|Rel_i|` -- number of relevant items for user :math:`i`
30
+
31
+ Metric is averaged by users.
32
+
33
+ .. math::
34
+ nDCG@K = \\frac {\\sum_{i=1}^{N}nDCG@K(i)}{N}
35
+ """
36
+
37
+ _scala_udf_name = "getNDCGMetricValue"
38
+
39
+ @staticmethod
40
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
41
+ if len(pred) == 0 or len(ground_truth) == 0:
42
+ return 0.0
43
+ pred_len = min(k, len(pred))
44
+ ground_truth_len = min(k, len(ground_truth))
45
+ denom = [1 / math.log2(i + 2) for i in range(k)]
46
+ dcg = sum(denom[i] for i in range(pred_len) if pred[i] in ground_truth)
47
+ idcg = sum(denom[:ground_truth_len])
48
+
49
+ return dcg / idcg
@@ -0,0 +1,22 @@
1
+ from .base_metric import Metric
2
+
3
+
4
+ class Precision(Metric):
5
+ """
6
+ Mean percentage of relevant items among top ``K`` recommendations.
7
+
8
+ .. math::
9
+ Precision@K(i) = \\frac {\\sum_{j=1}^{K}\\mathbb{1}_{r_{ij}}}{K}
10
+
11
+ .. math::
12
+ Precision@K = \\frac {\\sum_{i=1}^{N}Precision@K(i)}{N}
13
+
14
+ :math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing that user :math:`i` interacted with item :math:`j`"""
15
+
16
+ _scala_udf_name = "getPrecisionMetricValue"
17
+
18
+ @staticmethod
19
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
20
+ if len(pred) == 0:
21
+ return 0
22
+ return len(set(pred[:k]) & set(ground_truth)) / k
@@ -0,0 +1,25 @@
1
+ from .base_metric import Metric
2
+
3
+
4
+ class Recall(Metric):
5
+ """
6
+ Mean percentage of relevant items, that was shown among top ``K`` recommendations.
7
+
8
+ .. math::
9
+ Recall@K(i) = \\frac {\\sum_{j=1}^{K}\\mathbb{1}_{r_{ij}}}{|Rel_i|}
10
+
11
+ .. math::
12
+ Recall@K = \\frac {\\sum_{i=1}^{N}Recall@K(i)}{N}
13
+
14
+ :math:`\\mathbb{1}_{r_{ij}}` -- indicator function showing that user :math:`i` interacted with item :math:`j`
15
+
16
+ :math:`|Rel_i|` -- the number of relevant items for user :math:`i`
17
+ """
18
+
19
+ _scala_udf_name = "getRecallMetricValue"
20
+
21
+ @staticmethod
22
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
23
+ if len(ground_truth) == 0:
24
+ return 0.0
25
+ return len(set(pred[:k]) & set(ground_truth)) / len(ground_truth)
@@ -0,0 +1,49 @@
1
+ from .base_metric import Metric
2
+
3
+
4
+ class RocAuc(Metric):
5
+ """
6
+ Receiver Operating Characteristic/Area Under the Curve is the aggregated performance measure,
7
+ that depends only on the order of recommended items.
8
+ It can be interpreted as the fraction of object pairs (object of class 1, object of class 0)
9
+ that were correctly ordered by the model.
10
+ The bigger the value of AUC, the better the classification model.
11
+
12
+ .. math::
13
+ ROCAUC@K(i) = \\frac {\\sum_{s=1}^{K}\\sum_{t=1}^{K}
14
+ \\mathbb{1}_{r_{si}<r_{ti}}
15
+ \\mathbb{1}_{gt_{si}<gt_{ti}}}
16
+ {\\sum_{s=1}^{K}\\sum_{t=1}^{K} \\mathbb{1}_{gt_{si}<gt_{tj}}}
17
+
18
+ :math:`\\mathbb{1}_{r_{si}<r_{ti}}` -- indicator function showing that recommendation score for
19
+ user :math:`i` for item :math:`s` is bigger than for item :math:`t`
20
+
21
+ :math:`\\mathbb{1}_{gt_{si}<gt_{ti}}` -- indicator function showing that
22
+ user :math:`i` values item :math:`s` more than item :math:`t`.
23
+
24
+ Metric is averaged by all users.
25
+
26
+ .. math::
27
+ ROCAUC@K = \\frac {\\sum_{i=1}^{N}ROCAUC@K(i)}{N}
28
+ """
29
+
30
+ _scala_udf_name = "getRocAucMetricValue"
31
+
32
+ @staticmethod
33
+ def _get_metric_value_by_user(k, pred, ground_truth) -> float:
34
+ length = min(k, len(pred))
35
+ if len(ground_truth) == 0 or len(pred) == 0:
36
+ return 0
37
+
38
+ fp_cur = 0
39
+ fp_cum = 0
40
+ for item in pred[:length]:
41
+ if item in ground_truth:
42
+ fp_cum += fp_cur
43
+ else:
44
+ fp_cur += 1
45
+ if fp_cur == length:
46
+ return 0
47
+ if fp_cum == 0:
48
+ return 1
49
+ return 1 - fp_cum / (fp_cur * (length - fp_cur))
@@ -0,0 +1,90 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+
5
+ from replay.utils import PYSPARK_AVAILABLE, DataFrameLike, SparkDataFrame
6
+ from replay.utils.spark_utils import convert2spark, get_top_k_recs
7
+
8
+ from .base_metric import RecOnlyMetric, fill_na_with_empty_array, filter_sort
9
+
10
+ if PYSPARK_AVAILABLE:
11
+ from pyspark.sql import (
12
+ functions as sf,
13
+ types as st,
14
+ )
15
+
16
+
17
+ class Surprisal(RecOnlyMetric):
18
+ """
19
+ Measures how many surprising rare items are present in recommendations.
20
+
21
+ .. math::
22
+ \\textit{Self-Information}(j)= -\\log_2 \\frac {u_j}{N}
23
+
24
+ :math:`u_j` -- number of users that interacted with item :math:`j`.
25
+ Cold items are treated as if they were rated by 1 user.
26
+ That is, if they appear in recommendations it will be completely unexpected.
27
+
28
+ Metric is normalized.
29
+
30
+ Surprisal for item :math:`j` is
31
+
32
+ .. math::
33
+ Surprisal(j)= \\frac {\\textit{Self-Information}(j)}{log_2 N}
34
+
35
+ Recommendation list surprisal is the average surprisal of items in it.
36
+
37
+ .. math::
38
+ Surprisal@K(i) = \\frac {\\sum_{j=1}^{K}Surprisal(j)} {K}
39
+
40
+ Final metric is averaged by users.
41
+
42
+ .. math::
43
+ Surprisal@K = \\frac {\\sum_{i=1}^{N}Surprisal@K(i)}{N}
44
+ """
45
+
46
+ _scala_udf_name = "getSurprisalMetricValue"
47
+
48
+ def __init__(self, log: DataFrameLike, use_scala_udf: bool = False):
49
+ """
50
+ Here we calculate self-information for each item
51
+
52
+ :param log: historical data
53
+ """
54
+ self._use_scala_udf = use_scala_udf
55
+ self.log = convert2spark(log)
56
+ n_users = self.log.select("user_idx").distinct().count()
57
+ self.item_weights = self.log.groupby("item_idx").agg(
58
+ (sf.log2(n_users / sf.countDistinct("user_idx")) / np.log2(n_users)).alias("rec_weight")
59
+ )
60
+
61
+ @staticmethod
62
+ def _get_metric_value_by_user(k, *args):
63
+ weigths = args[0]
64
+ return sum(weigths[:k]) / k
65
+
66
+ def _get_enriched_recommendations(
67
+ self,
68
+ recommendations: SparkDataFrame,
69
+ ground_truth: SparkDataFrame, # noqa: ARG002
70
+ max_k: int,
71
+ ground_truth_users: Optional[DataFrameLike] = None,
72
+ ) -> SparkDataFrame:
73
+ recommendations = convert2spark(recommendations)
74
+ ground_truth_users = convert2spark(ground_truth_users)
75
+ recommendations = get_top_k_recs(recommendations, max_k)
76
+
77
+ recommendations = recommendations.join(self.item_weights, on="item_idx", how="left").fillna(1.0)
78
+ recommendations = filter_sort(recommendations, "rec_weight")
79
+ recommendations = recommendations.select("user_idx", sf.col("rec_weight")).withColumn(
80
+ "rec_weight",
81
+ sf.col("rec_weight").cast(st.ArrayType(st.DoubleType(), True)),
82
+ )
83
+ if ground_truth_users is not None:
84
+ recommendations = fill_na_with_empty_array(
85
+ recommendations.join(ground_truth_users, on="user_idx", how="right"),
86
+ "rec_weight",
87
+ self.item_weights.schema["rec_weight"].dataType,
88
+ )
89
+
90
+ return recommendations