replay-rec 0.18.1rc0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replay/__init__.py +1 -1
- replay/data/nn/schema.py +3 -1
- replay/metrics/surprisal.py +4 -2
- replay/models/lin_ucb.py +2 -3
- replay/models/nn/loss/__init__.py +1 -0
- replay/models/nn/loss/sce.py +131 -0
- replay/models/nn/sequential/bert4rec/lightning.py +36 -4
- replay/models/nn/sequential/bert4rec/model.py +5 -46
- replay/models/nn/sequential/sasrec/lightning.py +27 -3
- replay/models/nn/sequential/sasrec/model.py +1 -1
- replay/preprocessing/filters.py +102 -1
- replay/preprocessing/label_encoder.py +8 -4
- {replay_rec-0.18.1rc0.dist-info → replay_rec-0.19.0.dist-info}/METADATA +5 -12
- {replay_rec-0.18.1rc0.dist-info → replay_rec-0.19.0.dist-info}/RECORD +16 -70
- {replay_rec-0.18.1rc0.dist-info → replay_rec-0.19.0.dist-info}/WHEEL +1 -1
- replay/experimental/__init__.py +0 -0
- replay/experimental/metrics/__init__.py +0 -62
- replay/experimental/metrics/base_metric.py +0 -602
- replay/experimental/metrics/coverage.py +0 -97
- replay/experimental/metrics/experiment.py +0 -175
- replay/experimental/metrics/hitrate.py +0 -26
- replay/experimental/metrics/map.py +0 -30
- replay/experimental/metrics/mrr.py +0 -18
- replay/experimental/metrics/ncis_precision.py +0 -31
- replay/experimental/metrics/ndcg.py +0 -49
- replay/experimental/metrics/precision.py +0 -22
- replay/experimental/metrics/recall.py +0 -25
- replay/experimental/metrics/rocauc.py +0 -49
- replay/experimental/metrics/surprisal.py +0 -90
- replay/experimental/metrics/unexpectedness.py +0 -76
- replay/experimental/models/__init__.py +0 -13
- replay/experimental/models/admm_slim.py +0 -205
- replay/experimental/models/base_neighbour_rec.py +0 -204
- replay/experimental/models/base_rec.py +0 -1340
- replay/experimental/models/base_torch_rec.py +0 -234
- replay/experimental/models/cql.py +0 -454
- replay/experimental/models/ddpg.py +0 -923
- replay/experimental/models/dt4rec/__init__.py +0 -0
- replay/experimental/models/dt4rec/dt4rec.py +0 -189
- replay/experimental/models/dt4rec/gpt1.py +0 -401
- replay/experimental/models/dt4rec/trainer.py +0 -127
- replay/experimental/models/dt4rec/utils.py +0 -265
- replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
- replay/experimental/models/extensions/spark_custom_models/als_extension.py +0 -792
- replay/experimental/models/hierarchical_recommender.py +0 -331
- replay/experimental/models/implicit_wrap.py +0 -131
- replay/experimental/models/lightfm_wrap.py +0 -302
- replay/experimental/models/mult_vae.py +0 -332
- replay/experimental/models/neural_ts.py +0 -986
- replay/experimental/models/neuromf.py +0 -406
- replay/experimental/models/scala_als.py +0 -296
- replay/experimental/models/u_lin_ucb.py +0 -115
- replay/experimental/nn/data/__init__.py +0 -1
- replay/experimental/nn/data/schema_builder.py +0 -102
- replay/experimental/preprocessing/__init__.py +0 -3
- replay/experimental/preprocessing/data_preparator.py +0 -839
- replay/experimental/preprocessing/padder.py +0 -229
- replay/experimental/preprocessing/sequence_generator.py +0 -208
- replay/experimental/scenarios/__init__.py +0 -1
- replay/experimental/scenarios/obp_wrapper/__init__.py +0 -8
- replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +0 -74
- replay/experimental/scenarios/obp_wrapper/replay_offline.py +0 -261
- replay/experimental/scenarios/obp_wrapper/utils.py +0 -87
- replay/experimental/scenarios/two_stages/__init__.py +0 -0
- replay/experimental/scenarios/two_stages/reranker.py +0 -117
- replay/experimental/scenarios/two_stages/two_stages_scenario.py +0 -757
- replay/experimental/utils/__init__.py +0 -0
- replay/experimental/utils/logger.py +0 -24
- replay/experimental/utils/model_handler.py +0 -186
- replay/experimental/utils/session_handler.py +0 -44
- replay_rec-0.18.1rc0.dist-info/NOTICE +0 -41
- {replay_rec-0.18.1rc0.dist-info → replay_rec-0.19.0.dist-info}/LICENSE +0 -0
|
@@ -1,602 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Base classes for quality and diversity metrics.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Dict, List, Optional, Union
|
|
8
|
-
|
|
9
|
-
from scipy.stats import norm
|
|
10
|
-
|
|
11
|
-
from replay.utils import PYSPARK_AVAILABLE, DataFrameLike, IntOrList, NumType, PandasDataFrame, SparkDataFrame
|
|
12
|
-
from replay.utils.session_handler import State
|
|
13
|
-
from replay.utils.spark_utils import convert2spark, get_top_k_recs
|
|
14
|
-
|
|
15
|
-
if PYSPARK_AVAILABLE:
|
|
16
|
-
from pyspark.sql import (
|
|
17
|
-
Column,
|
|
18
|
-
Window,
|
|
19
|
-
functions as sf,
|
|
20
|
-
types as st,
|
|
21
|
-
)
|
|
22
|
-
from pyspark.sql.column import _to_java_column, _to_seq
|
|
23
|
-
from pyspark.sql.types import DataType
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def fill_na_with_empty_array(df: SparkDataFrame, col_name: str, element_type: DataType) -> SparkDataFrame:
|
|
27
|
-
"""
|
|
28
|
-
Fill empty values in array column with empty array of `element_type` values.
|
|
29
|
-
:param df: dataframe with `col_name` column of ArrayType(`element_type`)
|
|
30
|
-
:param col_name: name of a column to fill missing values
|
|
31
|
-
:param element_type: DataType of an array element
|
|
32
|
-
:return: df with `col_name` na values filled with empty arrays
|
|
33
|
-
"""
|
|
34
|
-
return df.withColumn(
|
|
35
|
-
col_name,
|
|
36
|
-
sf.coalesce(
|
|
37
|
-
col_name,
|
|
38
|
-
sf.array().cast(st.ArrayType(element_type)),
|
|
39
|
-
),
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def preprocess_gt(
|
|
44
|
-
ground_truth: DataFrameLike,
|
|
45
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
46
|
-
) -> SparkDataFrame:
|
|
47
|
-
"""
|
|
48
|
-
Preprocess `ground_truth` data before metric calculation
|
|
49
|
-
:param ground_truth: spark dataframe with columns ``[user_idx, item_idx, relevance]``
|
|
50
|
-
:param ground_truth_users: spark dataframe with column ``[user_idx]``
|
|
51
|
-
:return: spark dataframe with columns ``[user_idx, ground_truth]``
|
|
52
|
-
"""
|
|
53
|
-
ground_truth = convert2spark(ground_truth)
|
|
54
|
-
ground_truth_users = convert2spark(ground_truth_users)
|
|
55
|
-
|
|
56
|
-
true_items_by_users = ground_truth.groupby("user_idx").agg(sf.collect_set("item_idx").alias("ground_truth"))
|
|
57
|
-
if ground_truth_users is not None:
|
|
58
|
-
true_items_by_users = true_items_by_users.join(ground_truth_users, on="user_idx", how="right")
|
|
59
|
-
true_items_by_users = fill_na_with_empty_array(
|
|
60
|
-
true_items_by_users,
|
|
61
|
-
"ground_truth",
|
|
62
|
-
ground_truth.schema["item_idx"].dataType,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
return true_items_by_users
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def drop_duplicates(recommendations: DataFrameLike) -> SparkDataFrame:
|
|
69
|
-
"""
|
|
70
|
-
Filter duplicated predictions by choosing the most relevant
|
|
71
|
-
"""
|
|
72
|
-
return (
|
|
73
|
-
recommendations.withColumn(
|
|
74
|
-
"_num",
|
|
75
|
-
sf.row_number().over(Window.partitionBy("user_idx", "item_idx").orderBy(sf.col("relevance").desc())),
|
|
76
|
-
)
|
|
77
|
-
.where(sf.col("_num") == 1)
|
|
78
|
-
.drop("_num")
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def filter_sort(recommendations: SparkDataFrame, extra_column: Optional[str] = None) -> SparkDataFrame:
|
|
83
|
-
"""
|
|
84
|
-
Filters duplicated predictions by choosing items with the highest relevance,
|
|
85
|
-
Sorts items in predictions by its relevance,
|
|
86
|
-
If `extra_column` is not None return DataFrame with extra_column e.g. item weight.
|
|
87
|
-
|
|
88
|
-
:param recommendations: recommendation list
|
|
89
|
-
:param extra_column: column in recommendations
|
|
90
|
-
which will be return besides ``[user_idx, item_idx]``
|
|
91
|
-
:return: ``[user_idx, item_idx]`` if extra_column = None
|
|
92
|
-
or ``[user_idx, item_idx, extra_column]`` if extra_column exists.
|
|
93
|
-
"""
|
|
94
|
-
item_type = recommendations.schema["item_idx"].dataType
|
|
95
|
-
extra_column_type = recommendations.schema[extra_column].dataType if extra_column else None
|
|
96
|
-
|
|
97
|
-
recommendations = drop_duplicates(recommendations)
|
|
98
|
-
|
|
99
|
-
recommendations = (
|
|
100
|
-
recommendations.groupby("user_idx")
|
|
101
|
-
.agg(
|
|
102
|
-
sf.collect_list(sf.struct(*[c for c in ["relevance", "item_idx", extra_column] if c is not None])).alias(
|
|
103
|
-
"pred_list"
|
|
104
|
-
)
|
|
105
|
-
)
|
|
106
|
-
.withColumn("pred_list", sf.reverse(sf.array_sort("pred_list")))
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
selection = ["user_idx", sf.col("pred_list.item_idx").cast(st.ArrayType(item_type, True)).alias("pred")]
|
|
110
|
-
if extra_column:
|
|
111
|
-
selection.append(
|
|
112
|
-
sf.col(f"pred_list.{extra_column}").cast(st.ArrayType(extra_column_type, True)).alias(extra_column)
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
recommendations = recommendations.select(*selection)
|
|
116
|
-
|
|
117
|
-
return recommendations
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def get_enriched_recommendations(
|
|
121
|
-
recommendations: DataFrameLike,
|
|
122
|
-
ground_truth: DataFrameLike,
|
|
123
|
-
max_k: int,
|
|
124
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
125
|
-
) -> SparkDataFrame:
|
|
126
|
-
"""
|
|
127
|
-
Leave max_k recommendations for each user,
|
|
128
|
-
merge recommendations and ground truth into a single DataFrame
|
|
129
|
-
and aggregate items into lists so that each user has only one record.
|
|
130
|
-
|
|
131
|
-
:param recommendations: recommendation list
|
|
132
|
-
:param ground_truth: test data
|
|
133
|
-
:param max_k: maximal k value to calculate the metric for.
|
|
134
|
-
`max_k` most relevant predictions are left for each user
|
|
135
|
-
:param ground_truth_users: list of users to consider in metric calculation.
|
|
136
|
-
if None, only the users from ground_truth are considered.
|
|
137
|
-
:return: ``[user_idx, pred, ground_truth]``
|
|
138
|
-
"""
|
|
139
|
-
recommendations = convert2spark(recommendations)
|
|
140
|
-
# if there are duplicates in recommendations,
|
|
141
|
-
# we will leave fewer than k recommendations after sort_udf
|
|
142
|
-
recommendations = get_top_k_recs(recommendations, k=max_k)
|
|
143
|
-
|
|
144
|
-
true_items_by_users = preprocess_gt(ground_truth, ground_truth_users)
|
|
145
|
-
joined = filter_sort(recommendations).join(true_items_by_users, how="right", on=["user_idx"])
|
|
146
|
-
|
|
147
|
-
return fill_na_with_empty_array(joined, "pred", recommendations.schema["item_idx"].dataType)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def process_k(func):
|
|
151
|
-
"""Decorator that converts k to list and unpacks result"""
|
|
152
|
-
|
|
153
|
-
def wrap(self, recs: SparkDataFrame, k: IntOrList, *args):
|
|
154
|
-
k_list = [k] if isinstance(k, int) else k
|
|
155
|
-
|
|
156
|
-
res = func(self, recs, k_list, *args)
|
|
157
|
-
|
|
158
|
-
if isinstance(k, int):
|
|
159
|
-
return res[k]
|
|
160
|
-
return res
|
|
161
|
-
|
|
162
|
-
return wrap
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
class Metric(ABC):
|
|
166
|
-
"""Base metric class"""
|
|
167
|
-
|
|
168
|
-
_logger: Optional[logging.Logger] = None
|
|
169
|
-
_scala_udf_name: Optional[str] = None
|
|
170
|
-
|
|
171
|
-
def __init__(self, use_scala_udf: bool = False) -> None:
|
|
172
|
-
self._use_scala_udf = use_scala_udf
|
|
173
|
-
|
|
174
|
-
@property
|
|
175
|
-
def logger(self) -> logging.Logger:
|
|
176
|
-
"""
|
|
177
|
-
:returns: get library logger
|
|
178
|
-
"""
|
|
179
|
-
if self._logger is None:
|
|
180
|
-
self._logger = logging.getLogger("replay")
|
|
181
|
-
return self._logger
|
|
182
|
-
|
|
183
|
-
@property
|
|
184
|
-
def scala_udf_name(self) -> str:
|
|
185
|
-
"""Returns UDF name from `org.apache.spark.replay.utils.ScalaPySparkUDFs`"""
|
|
186
|
-
if self._scala_udf_name:
|
|
187
|
-
return self._scala_udf_name
|
|
188
|
-
else:
|
|
189
|
-
msg = f"Scala UDF not implemented for {type(self).__name__} class!"
|
|
190
|
-
raise NotImplementedError(msg)
|
|
191
|
-
|
|
192
|
-
def __str__(self):
|
|
193
|
-
return type(self).__name__
|
|
194
|
-
|
|
195
|
-
def __call__(
|
|
196
|
-
self,
|
|
197
|
-
recommendations: DataFrameLike,
|
|
198
|
-
ground_truth: DataFrameLike,
|
|
199
|
-
k: IntOrList,
|
|
200
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
201
|
-
) -> Union[Dict[int, NumType], NumType]:
|
|
202
|
-
"""
|
|
203
|
-
:param recommendations: model predictions in a
|
|
204
|
-
DataFrame ``[user_idx, item_idx, relevance]``
|
|
205
|
-
:param ground_truth: test data
|
|
206
|
-
``[user_idx, item_idx, timestamp, relevance]``
|
|
207
|
-
:param k: depth cut-off. Truncates recommendation lists to top-k items.
|
|
208
|
-
:param ground_truth_users: list of users to consider in metric calculation.
|
|
209
|
-
if None, only the users from ground_truth are considered.
|
|
210
|
-
:return: metric value
|
|
211
|
-
"""
|
|
212
|
-
recs = get_enriched_recommendations(
|
|
213
|
-
recommendations,
|
|
214
|
-
ground_truth,
|
|
215
|
-
max_k=k if isinstance(k, int) else max(k),
|
|
216
|
-
ground_truth_users=ground_truth_users,
|
|
217
|
-
)
|
|
218
|
-
return self._mean(recs, k)
|
|
219
|
-
|
|
220
|
-
@process_k
|
|
221
|
-
def _conf_interval(self, recs: SparkDataFrame, k_list: list, alpha: float):
|
|
222
|
-
res = {}
|
|
223
|
-
quantile = norm.ppf((1 + alpha) / 2)
|
|
224
|
-
for k in k_list:
|
|
225
|
-
distribution = self._get_metric_distribution(recs, k)
|
|
226
|
-
value = (
|
|
227
|
-
distribution.agg(
|
|
228
|
-
sf.stddev("value").alias("std"),
|
|
229
|
-
sf.count("value").alias("count"),
|
|
230
|
-
)
|
|
231
|
-
.select(
|
|
232
|
-
sf.when(
|
|
233
|
-
sf.isnan(sf.col("std")) | sf.col("std").isNull(),
|
|
234
|
-
sf.lit(0.0),
|
|
235
|
-
)
|
|
236
|
-
.otherwise(sf.col("std"))
|
|
237
|
-
.cast("float")
|
|
238
|
-
.alias("std"),
|
|
239
|
-
"count",
|
|
240
|
-
)
|
|
241
|
-
.first()
|
|
242
|
-
)
|
|
243
|
-
res[k] = quantile * value["std"] / (value["count"] ** 0.5)
|
|
244
|
-
return res
|
|
245
|
-
|
|
246
|
-
@process_k
|
|
247
|
-
def _median(self, recs: SparkDataFrame, k_list: list):
|
|
248
|
-
res = {}
|
|
249
|
-
for k in k_list:
|
|
250
|
-
distribution = self._get_metric_distribution(recs, k)
|
|
251
|
-
value = distribution.agg(sf.expr("percentile_approx(value, 0.5)").alias("value")).first()["value"]
|
|
252
|
-
res[k] = value
|
|
253
|
-
return res
|
|
254
|
-
|
|
255
|
-
@process_k
|
|
256
|
-
def _mean(self, recs: SparkDataFrame, k_list: list):
|
|
257
|
-
res = {}
|
|
258
|
-
for k in k_list:
|
|
259
|
-
distribution = self._get_metric_distribution(recs, k)
|
|
260
|
-
value = distribution.agg(sf.avg("value").alias("value")).first()["value"]
|
|
261
|
-
res[k] = value
|
|
262
|
-
return res
|
|
263
|
-
|
|
264
|
-
def _get_metric_distribution(self, recs: SparkDataFrame, k: int) -> SparkDataFrame:
|
|
265
|
-
"""
|
|
266
|
-
:param recs: recommendations
|
|
267
|
-
:param k: depth cut-off
|
|
268
|
-
:return: metric distribution for different cut-offs and users
|
|
269
|
-
"""
|
|
270
|
-
if self._use_scala_udf:
|
|
271
|
-
metric_value_col = self.get_scala_udf(self.scala_udf_name, [sf.lit(k).alias("k"), *recs.columns[1:]]).alias(
|
|
272
|
-
"value"
|
|
273
|
-
)
|
|
274
|
-
return recs.select("user_idx", metric_value_col)
|
|
275
|
-
|
|
276
|
-
cur_class = self.__class__
|
|
277
|
-
distribution = recs.rdd.flatMap(lambda x: [(x[0], float(cur_class._get_metric_value_by_user(k, *x[1:])))]).toDF(
|
|
278
|
-
f"user_idx {recs.schema['user_idx'].dataType.typeName()}, value double"
|
|
279
|
-
)
|
|
280
|
-
return distribution
|
|
281
|
-
|
|
282
|
-
@staticmethod
|
|
283
|
-
@abstractmethod
|
|
284
|
-
def _get_metric_value_by_user(k, pred, ground_truth) -> float:
|
|
285
|
-
"""
|
|
286
|
-
Metric calculation for one user.
|
|
287
|
-
|
|
288
|
-
:param k: depth cut-off
|
|
289
|
-
:param pred: recommendations
|
|
290
|
-
:param ground_truth: test data
|
|
291
|
-
:return: metric value for current user
|
|
292
|
-
"""
|
|
293
|
-
|
|
294
|
-
def user_distribution(
|
|
295
|
-
self,
|
|
296
|
-
log: DataFrameLike,
|
|
297
|
-
recommendations: DataFrameLike,
|
|
298
|
-
ground_truth: DataFrameLike,
|
|
299
|
-
k: IntOrList,
|
|
300
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
301
|
-
) -> PandasDataFrame:
|
|
302
|
-
"""
|
|
303
|
-
Get mean value of metric for all users with the same number of ratings.
|
|
304
|
-
|
|
305
|
-
:param log: history DataFrame to calculate number of ratings per user
|
|
306
|
-
:param recommendations: prediction DataFrame
|
|
307
|
-
:param ground_truth: test data
|
|
308
|
-
:param k: depth cut-off
|
|
309
|
-
:param ground_truth_users: list of users to consider in metric calculation.
|
|
310
|
-
if None, only the users from ground_truth are considered.
|
|
311
|
-
:return: pandas DataFrame
|
|
312
|
-
"""
|
|
313
|
-
log = convert2spark(log)
|
|
314
|
-
count = log.groupBy("user_idx").count()
|
|
315
|
-
if hasattr(self, "_get_enriched_recommendations"):
|
|
316
|
-
recs = self._get_enriched_recommendations(
|
|
317
|
-
recommendations,
|
|
318
|
-
ground_truth,
|
|
319
|
-
max_k=k if isinstance(k, int) else max(k),
|
|
320
|
-
ground_truth_users=ground_truth_users,
|
|
321
|
-
)
|
|
322
|
-
else:
|
|
323
|
-
recs = get_enriched_recommendations(
|
|
324
|
-
recommendations,
|
|
325
|
-
ground_truth,
|
|
326
|
-
max_k=k if isinstance(k, int) else max(k),
|
|
327
|
-
ground_truth_users=ground_truth_users,
|
|
328
|
-
)
|
|
329
|
-
k_list = [k] if isinstance(k, int) else k
|
|
330
|
-
res = PandasDataFrame()
|
|
331
|
-
for cut_off in k_list:
|
|
332
|
-
dist = self._get_metric_distribution(recs, cut_off)
|
|
333
|
-
val = count.join(dist, on="user_idx", how="right").fillna(0, subset="count")
|
|
334
|
-
val = (
|
|
335
|
-
val.groupBy("count")
|
|
336
|
-
.agg(sf.avg("value").alias("value"))
|
|
337
|
-
.orderBy(["count"])
|
|
338
|
-
.select("count", "value")
|
|
339
|
-
.toPandas()
|
|
340
|
-
)
|
|
341
|
-
res = res.append(val, ignore_index=True)
|
|
342
|
-
return res
|
|
343
|
-
|
|
344
|
-
@staticmethod
|
|
345
|
-
def get_scala_udf(udf_name: str, params: List) -> Column:
|
|
346
|
-
"""
|
|
347
|
-
Returns expression of calling scala UDF as column
|
|
348
|
-
|
|
349
|
-
:param udf_name: UDF name from `org.apache.spark.replay.utils.ScalaPySparkUDFs`
|
|
350
|
-
:param params: list of UDF params in right order
|
|
351
|
-
:return: column expression
|
|
352
|
-
"""
|
|
353
|
-
sc = State().session.sparkContext
|
|
354
|
-
scala_udf = getattr(sc._jvm.org.apache.spark.replay.utils.ScalaPySparkUDFs, udf_name)()
|
|
355
|
-
return Column(scala_udf.apply(_to_seq(sc, params, _to_java_column)))
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
class RecOnlyMetric(Metric):
|
|
359
|
-
"""Base class for metrics that do not need holdout data"""
|
|
360
|
-
|
|
361
|
-
@abstractmethod
|
|
362
|
-
def __init__(self, log: DataFrameLike, *args, **kwargs):
|
|
363
|
-
pass
|
|
364
|
-
|
|
365
|
-
@abstractmethod
|
|
366
|
-
def _get_enriched_recommendations(
|
|
367
|
-
self,
|
|
368
|
-
recommendations: DataFrameLike,
|
|
369
|
-
ground_truth: Optional[DataFrameLike],
|
|
370
|
-
max_k: int,
|
|
371
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
372
|
-
) -> SparkDataFrame:
|
|
373
|
-
pass
|
|
374
|
-
|
|
375
|
-
def __call__(
|
|
376
|
-
self,
|
|
377
|
-
recommendations: DataFrameLike,
|
|
378
|
-
k: IntOrList,
|
|
379
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
380
|
-
) -> Union[Dict[int, NumType], NumType]:
|
|
381
|
-
"""
|
|
382
|
-
:param recommendations: predictions of a model,
|
|
383
|
-
DataFrame ``[user_idx, item_idx, relevance]``
|
|
384
|
-
:param k: depth cut-off
|
|
385
|
-
:param ground_truth_users: list of users to consider in metric calculation.
|
|
386
|
-
if None, only the users from ground_truth are considered.
|
|
387
|
-
:return: metric value
|
|
388
|
-
"""
|
|
389
|
-
recs = self._get_enriched_recommendations(
|
|
390
|
-
recommendations,
|
|
391
|
-
None,
|
|
392
|
-
max_k=k if isinstance(k, int) else max(k),
|
|
393
|
-
ground_truth_users=ground_truth_users,
|
|
394
|
-
)
|
|
395
|
-
return self._mean(recs, k)
|
|
396
|
-
|
|
397
|
-
@staticmethod
|
|
398
|
-
@abstractmethod
|
|
399
|
-
def _get_metric_value_by_user(k, *args) -> float:
|
|
400
|
-
"""
|
|
401
|
-
Metric calculation for one user.
|
|
402
|
-
|
|
403
|
-
:param k: depth cut-off
|
|
404
|
-
:param *args: extra parameters, returned by
|
|
405
|
-
'''self._get_enriched_recommendations''' method
|
|
406
|
-
:return: metric value for current user
|
|
407
|
-
"""
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
class NCISMetric(Metric):
|
|
411
|
-
"""
|
|
412
|
-
RePlay implements Normalized Capped Importance Sampling for metric calculation with ``NCISMetric`` class.
|
|
413
|
-
This method is mostly applied to RL-based recommendation systems to perform counterfactual evaluation, but could be
|
|
414
|
-
used for any kind of recommender systems. See an article
|
|
415
|
-
`Offline A/B testing for Recommender Systems <http://arxiv.org/abs/1801.07030>` for details.
|
|
416
|
-
|
|
417
|
-
*Reward* (metric value for a user-item pair) is weighed by
|
|
418
|
-
the ratio of *current policy score* (current relevance) on *previous policy score* (historical relevance).
|
|
419
|
-
|
|
420
|
-
The *weight* is clipped by the *threshold* and put into interval :math:`[\\frac{1}{threshold}, threshold]`.
|
|
421
|
-
Activation function (e.g. softmax, sigmoid) could be applied to the scores before weights calculation.
|
|
422
|
-
|
|
423
|
-
Normalization weight for recommended item is calculated as follows:
|
|
424
|
-
|
|
425
|
-
.. math::
|
|
426
|
-
w_{ui} = \\frac{f(\\pi^t_ui, pi^t_u)}{f(\\pi^p_ui, pi^p_u)}
|
|
427
|
-
|
|
428
|
-
Where:
|
|
429
|
-
|
|
430
|
-
:math:`\\pi^t_{ui}` - current policy value (predicted relevance) of the user-item interaction
|
|
431
|
-
|
|
432
|
-
:math:`\\pi^p_{ui}` - previous policy value (historical relevance) of the user-item interaction.
|
|
433
|
-
Only values for user-item pairs present in current recommendations are used for calculation.
|
|
434
|
-
|
|
435
|
-
:math:`\\pi_u` - all predicted /historical policy values for selected user :math:`u`
|
|
436
|
-
|
|
437
|
-
:math:`f(\\pi_{ui}, \\pi_u)` - activation function applied to policy values (optional)
|
|
438
|
-
|
|
439
|
-
:math:`w_{ui}` - weight of user-item interaction for normalized metric calculation before clipping
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
Calculated weights are clipped as follows:
|
|
443
|
-
|
|
444
|
-
.. math::
|
|
445
|
-
\\hat{w_{ui}} = min(max(\\frac{1}{threshold}, w_{ui}), threshold)
|
|
446
|
-
|
|
447
|
-
Normalization metric value for a user is calculated as follows:
|
|
448
|
-
|
|
449
|
-
.. math::
|
|
450
|
-
R_u = \\frac{r_{ui} \\hat{w_{ui}}}{\\sum_{i}\\hat{w_{ui}}}
|
|
451
|
-
|
|
452
|
-
Where:
|
|
453
|
-
|
|
454
|
-
:math:`r_ui` - metric value (reward) for user-item interaction
|
|
455
|
-
|
|
456
|
-
:math:`R_u` - metric value (reward) for user :math:`u`
|
|
457
|
-
|
|
458
|
-
Weight calculation is implemented in ``_get_enriched_recommendations`` method.
|
|
459
|
-
"""
|
|
460
|
-
|
|
461
|
-
def __init__(
|
|
462
|
-
self,
|
|
463
|
-
prev_policy_weights: DataFrameLike,
|
|
464
|
-
threshold: float = 10.0,
|
|
465
|
-
activation: Optional[str] = None,
|
|
466
|
-
use_scala_udf: bool = False,
|
|
467
|
-
):
|
|
468
|
-
"""
|
|
469
|
-
:param prev_policy_weights: historical item of user-item relevance (previous policy values)
|
|
470
|
-
:threshold: capping threshold, applied after activation,
|
|
471
|
-
relevance values are cropped to interval [1/`threshold`, `threshold`]
|
|
472
|
-
:activation: activation function, applied over relevance values.
|
|
473
|
-
"logit"/"sigmoid", "softmax" or None
|
|
474
|
-
"""
|
|
475
|
-
self._use_scala_udf = use_scala_udf
|
|
476
|
-
self.prev_policy_weights = convert2spark(prev_policy_weights).withColumnRenamed("relevance", "prev_relevance")
|
|
477
|
-
self.threshold = threshold
|
|
478
|
-
if activation is None or activation in ("logit", "sigmoid", "softmax"):
|
|
479
|
-
self.activation = activation
|
|
480
|
-
if activation == "softmax":
|
|
481
|
-
self.logger.info(
|
|
482
|
-
"For accurate softmax calculation pass only one `k` value in the NCISMetric metrics `call`"
|
|
483
|
-
)
|
|
484
|
-
else:
|
|
485
|
-
msg = f"Unexpected `activation` - {activation}"
|
|
486
|
-
raise ValueError(msg)
|
|
487
|
-
if threshold <= 0:
|
|
488
|
-
msg = "Threshold should be positive real number"
|
|
489
|
-
raise ValueError(msg)
|
|
490
|
-
|
|
491
|
-
@staticmethod
|
|
492
|
-
def _softmax_by_user(df: SparkDataFrame, col_name: str) -> SparkDataFrame:
|
|
493
|
-
"""
|
|
494
|
-
Subtract minimal value (relevance) by user from `col_name`
|
|
495
|
-
and apply softmax by user to `col_name`.
|
|
496
|
-
"""
|
|
497
|
-
return (
|
|
498
|
-
df.withColumn(
|
|
499
|
-
"_min_rel_user",
|
|
500
|
-
sf.min(col_name).over(Window.partitionBy("user_idx")),
|
|
501
|
-
)
|
|
502
|
-
.withColumn(col_name, sf.exp(sf.col(col_name) - sf.col("_min_rel_user")))
|
|
503
|
-
.withColumn(
|
|
504
|
-
col_name,
|
|
505
|
-
sf.col(col_name) / sf.sum(col_name).over(Window.partitionBy("user_idx")),
|
|
506
|
-
)
|
|
507
|
-
.drop("_min_rel_user")
|
|
508
|
-
)
|
|
509
|
-
|
|
510
|
-
@staticmethod
|
|
511
|
-
def _sigmoid(df: SparkDataFrame, col_name: str) -> SparkDataFrame:
|
|
512
|
-
"""
|
|
513
|
-
Apply sigmoid/logistic function to column `col_name`
|
|
514
|
-
"""
|
|
515
|
-
return df.withColumn(col_name, sf.lit(1.0) / (sf.lit(1.0) + sf.exp(-sf.col(col_name))))
|
|
516
|
-
|
|
517
|
-
@staticmethod
|
|
518
|
-
def _weigh_and_clip(
|
|
519
|
-
df: SparkDataFrame,
|
|
520
|
-
threshold: float,
|
|
521
|
-
target_policy_col: str = "relevance",
|
|
522
|
-
prev_policy_col: str = "prev_relevance",
|
|
523
|
-
):
|
|
524
|
-
"""
|
|
525
|
-
Clip weights to fit into interval [1/threshold, threshold].
|
|
526
|
-
"""
|
|
527
|
-
lower, upper = 1 / threshold, threshold
|
|
528
|
-
return (
|
|
529
|
-
df.withColumn(
|
|
530
|
-
"weight_unbounded",
|
|
531
|
-
sf.col(target_policy_col) / sf.col(prev_policy_col),
|
|
532
|
-
)
|
|
533
|
-
.withColumn(
|
|
534
|
-
"weight",
|
|
535
|
-
sf.when(sf.col(prev_policy_col) == sf.lit(0.0), sf.lit(upper))
|
|
536
|
-
.when(sf.col("weight_unbounded") < sf.lit(lower), sf.lit(lower))
|
|
537
|
-
.when(sf.col("weight_unbounded") > sf.lit(upper), sf.lit(upper))
|
|
538
|
-
.otherwise(sf.col("weight_unbounded")),
|
|
539
|
-
)
|
|
540
|
-
.select("user_idx", "item_idx", "relevance", "weight")
|
|
541
|
-
)
|
|
542
|
-
|
|
543
|
-
def _reweighing(self, recommendations):
|
|
544
|
-
if self.activation == "softmax":
|
|
545
|
-
recommendations = self._softmax_by_user(recommendations, col_name="prev_relevance")
|
|
546
|
-
recommendations = self._softmax_by_user(recommendations, col_name="relevance")
|
|
547
|
-
elif self.activation in ["logit", "sigmoid"]:
|
|
548
|
-
recommendations = self._sigmoid(recommendations, col_name="prev_relevance")
|
|
549
|
-
recommendations = self._sigmoid(recommendations, col_name="relevance")
|
|
550
|
-
|
|
551
|
-
return self._weigh_and_clip(recommendations, self.threshold)
|
|
552
|
-
|
|
553
|
-
def _get_enriched_recommendations(
|
|
554
|
-
self,
|
|
555
|
-
recommendations: DataFrameLike,
|
|
556
|
-
ground_truth: DataFrameLike,
|
|
557
|
-
max_k: int,
|
|
558
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
559
|
-
) -> SparkDataFrame:
|
|
560
|
-
"""
|
|
561
|
-
Merge recommendations and ground truth into a single DataFrame
|
|
562
|
-
and aggregate items into lists so that each user has only one record.
|
|
563
|
-
|
|
564
|
-
:param recommendations: recommendation list
|
|
565
|
-
:param ground_truth: test data
|
|
566
|
-
:param max_k: maximal k value to calculate the metric for.
|
|
567
|
-
`max_k` most relevant predictions are left for each user
|
|
568
|
-
:param ground_truth_users: list of users to consider in metric calculation.
|
|
569
|
-
if None, only the users from ground_truth are considered.
|
|
570
|
-
:return: ``[user_idx, pred, ground_truth]``
|
|
571
|
-
"""
|
|
572
|
-
recommendations = convert2spark(recommendations)
|
|
573
|
-
ground_truth = convert2spark(ground_truth)
|
|
574
|
-
ground_truth_users = convert2spark(ground_truth_users)
|
|
575
|
-
|
|
576
|
-
true_items_by_users = ground_truth.groupby("user_idx").agg(sf.collect_set("item_idx").alias("ground_truth"))
|
|
577
|
-
|
|
578
|
-
group_on = ["item_idx"]
|
|
579
|
-
if "user_idx" in self.prev_policy_weights.columns:
|
|
580
|
-
group_on.append("user_idx")
|
|
581
|
-
recommendations = get_top_k_recs(recommendations, k=max_k)
|
|
582
|
-
|
|
583
|
-
recommendations = recommendations.join(self.prev_policy_weights, on=group_on, how="left").na.fill(
|
|
584
|
-
0.0, subset=["prev_relevance"]
|
|
585
|
-
)
|
|
586
|
-
|
|
587
|
-
recommendations = self._reweighing(recommendations)
|
|
588
|
-
|
|
589
|
-
weight_type = recommendations.schema["weight"].dataType
|
|
590
|
-
item_type = ground_truth.schema["item_idx"].dataType
|
|
591
|
-
|
|
592
|
-
recommendations = filter_sort(recommendations, "weight")
|
|
593
|
-
|
|
594
|
-
if ground_truth_users is not None:
|
|
595
|
-
true_items_by_users = true_items_by_users.join(ground_truth_users, on="user_idx", how="right")
|
|
596
|
-
|
|
597
|
-
recommendations = recommendations.join(true_items_by_users, how="right", on=["user_idx"])
|
|
598
|
-
return fill_na_with_empty_array(
|
|
599
|
-
fill_na_with_empty_array(recommendations, "pred", item_type),
|
|
600
|
-
"weight",
|
|
601
|
-
weight_type,
|
|
602
|
-
)
|
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
from typing import Dict, Optional, Union
|
|
2
|
-
|
|
3
|
-
from replay.utils import PYSPARK_AVAILABLE, DataFrameLike, IntOrList, NumType, SparkDataFrame
|
|
4
|
-
from replay.utils.spark_utils import convert2spark
|
|
5
|
-
|
|
6
|
-
from .base_metric import RecOnlyMetric, process_k
|
|
7
|
-
|
|
8
|
-
if PYSPARK_AVAILABLE:
|
|
9
|
-
from pyspark.sql import (
|
|
10
|
-
Window,
|
|
11
|
-
functions as sf,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class Coverage(RecOnlyMetric):
|
|
16
|
-
"""
|
|
17
|
-
Metric calculation is as follows:
|
|
18
|
-
|
|
19
|
-
* take ``K`` recommendations with the biggest ``relevance`` for each ``user_id``
|
|
20
|
-
* count the number of distinct ``item_id`` in these recommendations
|
|
21
|
-
* divide it by the number of items in the whole data set
|
|
22
|
-
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
def __init__(self, log: DataFrameLike):
|
|
26
|
-
"""
|
|
27
|
-
:param log: pandas or Spark DataFrame
|
|
28
|
-
It is important for ``log`` to contain all available items.
|
|
29
|
-
"""
|
|
30
|
-
self.items = convert2spark(log).select("item_idx").distinct()
|
|
31
|
-
self.item_count = self.items.count()
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def _get_metric_value_by_user(k, *args):
|
|
35
|
-
# not averaged by users
|
|
36
|
-
pass
|
|
37
|
-
|
|
38
|
-
def _get_enriched_recommendations(
|
|
39
|
-
self,
|
|
40
|
-
recommendations: DataFrameLike,
|
|
41
|
-
ground_truth: DataFrameLike, # noqa: ARG002
|
|
42
|
-
max_k: int, # noqa: ARG002
|
|
43
|
-
ground_truth_users: Optional[DataFrameLike] = None,
|
|
44
|
-
) -> SparkDataFrame:
|
|
45
|
-
recommendations = convert2spark(recommendations)
|
|
46
|
-
if ground_truth_users is not None:
|
|
47
|
-
ground_truth_users = convert2spark(ground_truth_users)
|
|
48
|
-
return recommendations.join(ground_truth_users, on="user_idx", how="inner")
|
|
49
|
-
return recommendations
|
|
50
|
-
|
|
51
|
-
def _conf_interval(
|
|
52
|
-
self,
|
|
53
|
-
recs: DataFrameLike, # noqa: ARG002
|
|
54
|
-
k_list: IntOrList,
|
|
55
|
-
alpha: float = 0.95, # noqa: ARG002
|
|
56
|
-
) -> Union[Dict[int, float], float]:
|
|
57
|
-
if isinstance(k_list, int):
|
|
58
|
-
return 0.0
|
|
59
|
-
return {i: 0.0 for i in k_list}
|
|
60
|
-
|
|
61
|
-
def _median(
|
|
62
|
-
self,
|
|
63
|
-
recs: DataFrameLike,
|
|
64
|
-
k_list: IntOrList,
|
|
65
|
-
) -> Union[Dict[int, NumType], NumType]:
|
|
66
|
-
return self._mean(recs, k_list)
|
|
67
|
-
|
|
68
|
-
@process_k
|
|
69
|
-
def _mean(
|
|
70
|
-
self,
|
|
71
|
-
recs: SparkDataFrame,
|
|
72
|
-
k_list: list,
|
|
73
|
-
) -> Union[Dict[int, NumType], NumType]:
|
|
74
|
-
unknown_item_count = recs.select("item_idx").distinct().exceptAll(self.items).count()
|
|
75
|
-
if unknown_item_count > 0:
|
|
76
|
-
self.logger.warning(
|
|
77
|
-
"Recommendations contain items that were not present in the log. "
|
|
78
|
-
r"The resulting metric value can be more than 1.0 ¯\_(ツ)_/¯"
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
best_positions = (
|
|
82
|
-
recs.withColumn(
|
|
83
|
-
"row_num",
|
|
84
|
-
sf.row_number().over(Window.partitionBy("user_idx").orderBy(sf.desc("relevance"))),
|
|
85
|
-
)
|
|
86
|
-
.select("item_idx", "row_num")
|
|
87
|
-
.groupBy("item_idx")
|
|
88
|
-
.agg(sf.min("row_num").alias("best_position"))
|
|
89
|
-
.cache()
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
res = {}
|
|
93
|
-
for current_k in k_list:
|
|
94
|
-
res[current_k] = best_positions.filter(sf.col("best_position") <= current_k).count() / self.item_count
|
|
95
|
-
|
|
96
|
-
best_positions.unpersist()
|
|
97
|
-
return res
|