replay-rec 0.16.0rc0__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replay/__init__.py +1 -1
- replay/data/__init__.py +1 -1
- replay/data/dataset.py +45 -42
- replay/data/dataset_utils/dataset_label_encoder.py +6 -7
- replay/data/nn/__init__.py +1 -1
- replay/data/nn/schema.py +20 -33
- replay/data/nn/sequence_tokenizer.py +217 -87
- replay/data/nn/sequential_dataset.py +6 -22
- replay/data/nn/torch_sequential_dataset.py +20 -11
- replay/data/nn/utils.py +7 -9
- replay/data/schema.py +17 -17
- replay/data/spark_schema.py +0 -1
- replay/metrics/base_metric.py +38 -79
- replay/metrics/categorical_diversity.py +24 -58
- replay/metrics/coverage.py +25 -49
- replay/metrics/descriptors.py +4 -13
- replay/metrics/experiment.py +3 -8
- replay/metrics/hitrate.py +3 -6
- replay/metrics/map.py +3 -6
- replay/metrics/mrr.py +1 -4
- replay/metrics/ndcg.py +4 -7
- replay/metrics/novelty.py +10 -29
- replay/metrics/offline_metrics.py +26 -61
- replay/metrics/precision.py +3 -6
- replay/metrics/recall.py +3 -6
- replay/metrics/rocauc.py +7 -10
- replay/metrics/surprisal.py +13 -30
- replay/metrics/torch_metrics_builder.py +0 -4
- replay/metrics/unexpectedness.py +15 -20
- replay/models/__init__.py +1 -2
- replay/models/als.py +7 -15
- replay/models/association_rules.py +12 -28
- replay/models/base_neighbour_rec.py +21 -36
- replay/models/base_rec.py +92 -215
- replay/models/cat_pop_rec.py +9 -22
- replay/models/cluster.py +17 -28
- replay/models/extensions/ann/ann_mixin.py +7 -12
- replay/models/extensions/ann/entities/base_hnsw_param.py +1 -1
- replay/models/extensions/ann/entities/hnswlib_param.py +0 -6
- replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -6
- replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +4 -10
- replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +7 -11
- replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +5 -12
- replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +11 -18
- replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +1 -4
- replay/models/extensions/ann/index_inferers/base_inferer.py +3 -10
- replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +7 -17
- replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +6 -14
- replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +14 -28
- replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +15 -25
- replay/models/extensions/ann/index_inferers/utils.py +2 -9
- replay/models/extensions/ann/index_stores/hdfs_index_store.py +4 -9
- replay/models/extensions/ann/index_stores/shared_disk_index_store.py +2 -6
- replay/models/extensions/ann/index_stores/spark_files_index_store.py +8 -14
- replay/models/extensions/ann/index_stores/utils.py +5 -2
- replay/models/extensions/ann/utils.py +3 -5
- replay/models/kl_ucb.py +16 -22
- replay/models/knn.py +37 -59
- replay/models/nn/optimizer_utils/__init__.py +1 -6
- replay/models/nn/optimizer_utils/optimizer_factory.py +3 -6
- replay/models/nn/sequential/bert4rec/__init__.py +1 -1
- replay/models/nn/sequential/bert4rec/dataset.py +6 -7
- replay/models/nn/sequential/bert4rec/lightning.py +53 -56
- replay/models/nn/sequential/bert4rec/model.py +12 -25
- replay/models/nn/sequential/callbacks/__init__.py +1 -1
- replay/models/nn/sequential/callbacks/prediction_callbacks.py +23 -25
- replay/models/nn/sequential/callbacks/validation_callback.py +27 -30
- replay/models/nn/sequential/postprocessors/postprocessors.py +1 -1
- replay/models/nn/sequential/sasrec/dataset.py +8 -7
- replay/models/nn/sequential/sasrec/lightning.py +53 -48
- replay/models/nn/sequential/sasrec/model.py +4 -17
- replay/models/pop_rec.py +9 -10
- replay/models/query_pop_rec.py +7 -15
- replay/models/random_rec.py +10 -18
- replay/models/slim.py +8 -13
- replay/models/thompson_sampling.py +13 -14
- replay/models/ucb.py +11 -22
- replay/models/wilson.py +5 -14
- replay/models/word2vec.py +24 -69
- replay/optimization/optuna_objective.py +13 -27
- replay/preprocessing/__init__.py +1 -2
- replay/preprocessing/converter.py +2 -7
- replay/preprocessing/filters.py +67 -142
- replay/preprocessing/history_based_fp.py +44 -116
- replay/preprocessing/label_encoder.py +106 -68
- replay/preprocessing/sessionizer.py +1 -11
- replay/scenarios/fallback.py +3 -8
- replay/splitters/base_splitter.py +43 -15
- replay/splitters/cold_user_random_splitter.py +18 -31
- replay/splitters/k_folds.py +14 -24
- replay/splitters/last_n_splitter.py +33 -43
- replay/splitters/new_users_splitter.py +31 -55
- replay/splitters/random_splitter.py +16 -23
- replay/splitters/ratio_splitter.py +30 -54
- replay/splitters/time_splitter.py +13 -18
- replay/splitters/two_stage_splitter.py +44 -79
- replay/utils/__init__.py +1 -1
- replay/utils/common.py +65 -0
- replay/utils/dataframe_bucketizer.py +25 -31
- replay/utils/distributions.py +3 -15
- replay/utils/model_handler.py +36 -33
- replay/utils/session_handler.py +11 -15
- replay/utils/spark_utils.py +51 -85
- replay/utils/time.py +8 -22
- replay/utils/types.py +1 -3
- {replay_rec-0.16.0rc0.dist-info → replay_rec-0.17.0.dist-info}/METADATA +2 -10
- replay_rec-0.17.0.dist-info/RECORD +127 -0
- {replay_rec-0.16.0rc0.dist-info → replay_rec-0.17.0.dist-info}/WHEEL +1 -1
- replay/experimental/__init__.py +0 -0
- replay/experimental/metrics/__init__.py +0 -61
- replay/experimental/metrics/base_metric.py +0 -661
- replay/experimental/metrics/coverage.py +0 -117
- replay/experimental/metrics/experiment.py +0 -200
- replay/experimental/metrics/hitrate.py +0 -27
- replay/experimental/metrics/map.py +0 -31
- replay/experimental/metrics/mrr.py +0 -19
- replay/experimental/metrics/ncis_precision.py +0 -32
- replay/experimental/metrics/ndcg.py +0 -50
- replay/experimental/metrics/precision.py +0 -23
- replay/experimental/metrics/recall.py +0 -26
- replay/experimental/metrics/rocauc.py +0 -50
- replay/experimental/metrics/surprisal.py +0 -102
- replay/experimental/metrics/unexpectedness.py +0 -74
- replay/experimental/models/__init__.py +0 -10
- replay/experimental/models/admm_slim.py +0 -216
- replay/experimental/models/base_neighbour_rec.py +0 -222
- replay/experimental/models/base_rec.py +0 -1361
- replay/experimental/models/base_torch_rec.py +0 -247
- replay/experimental/models/cql.py +0 -468
- replay/experimental/models/ddpg.py +0 -1007
- replay/experimental/models/dt4rec/__init__.py +0 -0
- replay/experimental/models/dt4rec/dt4rec.py +0 -193
- replay/experimental/models/dt4rec/gpt1.py +0 -411
- replay/experimental/models/dt4rec/trainer.py +0 -128
- replay/experimental/models/dt4rec/utils.py +0 -274
- replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
- replay/experimental/models/extensions/spark_custom_models/als_extension.py +0 -733
- replay/experimental/models/implicit_wrap.py +0 -138
- replay/experimental/models/lightfm_wrap.py +0 -327
- replay/experimental/models/mult_vae.py +0 -374
- replay/experimental/models/neuromf.py +0 -462
- replay/experimental/models/scala_als.py +0 -311
- replay/experimental/nn/data/__init__.py +0 -1
- replay/experimental/nn/data/schema_builder.py +0 -58
- replay/experimental/preprocessing/__init__.py +0 -3
- replay/experimental/preprocessing/data_preparator.py +0 -929
- replay/experimental/preprocessing/padder.py +0 -231
- replay/experimental/preprocessing/sequence_generator.py +0 -218
- replay/experimental/scenarios/__init__.py +0 -1
- replay/experimental/scenarios/obp_wrapper/__init__.py +0 -8
- replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +0 -86
- replay/experimental/scenarios/obp_wrapper/replay_offline.py +0 -271
- replay/experimental/scenarios/obp_wrapper/utils.py +0 -88
- replay/experimental/scenarios/two_stages/reranker.py +0 -116
- replay/experimental/scenarios/two_stages/two_stages_scenario.py +0 -843
- replay/experimental/utils/__init__.py +0 -0
- replay/experimental/utils/logger.py +0 -24
- replay/experimental/utils/model_handler.py +0 -213
- replay/experimental/utils/session_handler.py +0 -47
- replay_rec-0.16.0rc0.dist-info/NOTICE +0 -41
- replay_rec-0.16.0rc0.dist-info/RECORD +0 -178
- {replay_rec-0.16.0rc0.dist-info → replay_rec-0.17.0.dist-info}/LICENSE +0 -0
replay/utils/time.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
3
|
from .spark_utils import convert2spark
|
|
4
|
-
|
|
5
4
|
from .types import PYSPARK_AVAILABLE, DataFrameLike
|
|
6
5
|
|
|
7
6
|
if PYSPARK_AVAILABLE:
|
|
@@ -106,9 +105,7 @@ def get_item_recency(
|
|
|
106
105
|
"item_idx",
|
|
107
106
|
sf.unix_timestamp(sf.to_timestamp("timestamp")).alias("timestamp"),
|
|
108
107
|
)
|
|
109
|
-
items = items.groupBy("item_idx").agg(
|
|
110
|
-
sf.mean("timestamp").alias("timestamp")
|
|
111
|
-
)
|
|
108
|
+
items = items.groupBy("item_idx").agg(sf.mean("timestamp").alias("timestamp"))
|
|
112
109
|
items = items.withColumn("relevance", sf.lit(1))
|
|
113
110
|
items = smoothe_time(items, decay, limit, kind)
|
|
114
111
|
return items
|
|
@@ -234,16 +231,10 @@ def smoothe_time(
|
|
|
234
231
|
<BLANKLINE>
|
|
235
232
|
"""
|
|
236
233
|
log = convert2spark(log)
|
|
237
|
-
log = log.withColumn(
|
|
238
|
-
|
|
239
|
-
)
|
|
240
|
-
last_date = (
|
|
241
|
-
log.agg({"timestamp": "max"}).collect()[0].asDict()["max(timestamp)"]
|
|
242
|
-
)
|
|
234
|
+
log = log.withColumn("timestamp", sf.unix_timestamp(sf.to_timestamp("timestamp")))
|
|
235
|
+
last_date = log.agg({"timestamp": "max"}).collect()[0].asDict()["max(timestamp)"]
|
|
243
236
|
day_in_secs = 86400
|
|
244
|
-
log = log.withColumn(
|
|
245
|
-
"age", (last_date - sf.col("timestamp")) / day_in_secs
|
|
246
|
-
)
|
|
237
|
+
log = log.withColumn("age", (last_date - sf.col("timestamp")) / day_in_secs)
|
|
247
238
|
if kind == "power":
|
|
248
239
|
power = np.log(0.5) / np.log(decay)
|
|
249
240
|
log = log.withColumn("age", sf.pow(sf.col("age") + 1, power))
|
|
@@ -254,15 +245,10 @@ def smoothe_time(
|
|
|
254
245
|
k = 0.5 / decay
|
|
255
246
|
log = log.withColumn("age", 1 - k * sf.col("age"))
|
|
256
247
|
else:
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
)
|
|
248
|
+
msg = f"parameter kind must be one of [power, exp, linear], got {kind}"
|
|
249
|
+
raise ValueError(msg)
|
|
260
250
|
|
|
261
|
-
log = log.withColumn(
|
|
262
|
-
|
|
263
|
-
)
|
|
264
|
-
log = log.withColumn(
|
|
265
|
-
"relevance", sf.col("relevance") * sf.col("age")
|
|
266
|
-
).drop("age")
|
|
251
|
+
log = log.withColumn("age", sf.when(sf.col("age") < limit, limit).otherwise(sf.col("age")))
|
|
252
|
+
log = log.withColumn("relevance", sf.col("relevance") * sf.col("age")).drop("age")
|
|
267
253
|
log = log.withColumn("timestamp", sf.to_timestamp("timestamp"))
|
|
268
254
|
return log
|
replay/utils/types.py
CHANGED
|
@@ -4,7 +4,6 @@ from pandas import DataFrame as PandasDataFrame
|
|
|
4
4
|
from polars import DataFrame as PolarsDataFrame
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
# pylint: disable=too-few-public-methods
|
|
8
7
|
class MissingImportType:
|
|
9
8
|
"""
|
|
10
9
|
Replacement class with missing import
|
|
@@ -17,10 +16,9 @@ try:
|
|
|
17
16
|
PYSPARK_AVAILABLE = True
|
|
18
17
|
except ImportError:
|
|
19
18
|
PYSPARK_AVAILABLE = False
|
|
20
|
-
SparkDataFrame = MissingImportType
|
|
19
|
+
SparkDataFrame = MissingImportType
|
|
21
20
|
|
|
22
21
|
try:
|
|
23
|
-
# pylint: disable=unused-import
|
|
24
22
|
import torch # noqa: F401
|
|
25
23
|
|
|
26
24
|
TORCH_AVAILABLE = True
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: replay-rec
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.17.0
|
|
4
4
|
Summary: RecSys Library
|
|
5
5
|
Home-page: https://sb-ai-lab.github.io/RePlay/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -20,25 +20,17 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
20
20
|
Provides-Extra: all
|
|
21
21
|
Provides-Extra: spark
|
|
22
22
|
Provides-Extra: torch
|
|
23
|
-
Requires-Dist: d3rlpy (>=2.0.4,<3.0.0)
|
|
24
|
-
Requires-Dist: gym (>=0.26.0,<0.27.0)
|
|
25
23
|
Requires-Dist: hnswlib (==0.7.0)
|
|
26
|
-
Requires-Dist: implicit (>=0.7.0,<0.8.0)
|
|
27
|
-
Requires-Dist: lightautoml (>=0.3.1,<0.4.0)
|
|
28
|
-
Requires-Dist: lightfm (==1.17)
|
|
29
24
|
Requires-Dist: lightning (>=2.0.2,<3.0.0) ; extra == "torch" or extra == "all"
|
|
30
|
-
Requires-Dist: llvmlite (>=0.32.1)
|
|
31
25
|
Requires-Dist: nmslib (==2.1.1)
|
|
32
|
-
Requires-Dist: numba (>=0.50)
|
|
33
26
|
Requires-Dist: numpy (>=1.20.0)
|
|
34
27
|
Requires-Dist: optuna (>=3.2.0,<3.3.0)
|
|
35
28
|
Requires-Dist: pandas (>=1.3.5,<2.0.0)
|
|
36
29
|
Requires-Dist: polars (>=0.20.7,<0.21.0)
|
|
37
30
|
Requires-Dist: psutil (>=5.9.5,<5.10.0)
|
|
38
|
-
Requires-Dist: pyarrow (>=12.0.1
|
|
31
|
+
Requires-Dist: pyarrow (>=12.0.1)
|
|
39
32
|
Requires-Dist: pyspark (>=3.0,<3.3) ; extra == "spark" or extra == "all"
|
|
40
33
|
Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "all"
|
|
41
|
-
Requires-Dist: sb-obp (>=0.5.7,<0.6.0)
|
|
42
34
|
Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
|
|
43
35
|
Requires-Dist: scipy (>=1.8.1,<1.9.0)
|
|
44
36
|
Requires-Dist: torch (>=1.8,<2.0) ; extra == "torch" or extra == "all"
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
replay/__init__.py,sha256=knHFGIzZuD7qKkGHgDbbA_GnYg1omuXxLDS4JVvYEdk,46
|
|
2
|
+
replay/data/__init__.py,sha256=g5bKRyF76QL_BqlED-31RnS8pBdcyj9loMsx5vAG_0E,301
|
|
3
|
+
replay/data/dataset.py,sha256=ysMTNfx8I2hI9fSugtt3IPhenmutgzQMw-8VcM3oUJk,21299
|
|
4
|
+
replay/data/dataset_utils/__init__.py,sha256=9wUvG8ZwGUvuzLU4zQI5FDcH0WVVo5YLN2ey3DterP0,55
|
|
5
|
+
replay/data/dataset_utils/dataset_label_encoder.py,sha256=TEx2zLw5rJdIz1SRBEznyVv5x_Cs7o6QQbzMk-M1LU0,9598
|
|
6
|
+
replay/data/nn/__init__.py,sha256=WxLsi4rgOuuvGYHN49xBPxP2Srhqf3NYgfBDVH-ZvBo,1122
|
|
7
|
+
replay/data/nn/schema.py,sha256=BYU65vLqPDl69OE-rReh59fiQK0ERfs1xbBLWCiIJnw,14258
|
|
8
|
+
replay/data/nn/sequence_tokenizer.py,sha256=dXD8l7IfK1dod8p--I6BhvE9af3iUOfpaoW2QBU9hTs,34133
|
|
9
|
+
replay/data/nn/sequential_dataset.py,sha256=fqlyBAzDmpH332S-LoMP9PoRYMtgZpxG6Qdahmk5GtE,7840
|
|
10
|
+
replay/data/nn/torch_sequential_dataset.py,sha256=BqrK_PtkhpsaY1zRIWGk4EgwPL31a7IWCc0hLDuwDQc,10984
|
|
11
|
+
replay/data/nn/utils.py,sha256=YKE9gkIHZDDiwv4THqOWL4PzsdOujnPuM97v79Mwq0E,2769
|
|
12
|
+
replay/data/schema.py,sha256=F_cv6sYb6l23yuX5xWnbqoJ9oSeUT2NpIM19u8Lf2jA,15606
|
|
13
|
+
replay/data/spark_schema.py,sha256=4o0Kn_fjwz2-9dBY3q46F9PL0F3E7jdVpIlX7SG3OZI,1111
|
|
14
|
+
replay/metrics/__init__.py,sha256=KDkxVnKa4ks9K9GmlrdTx1pkIl-MAmm78ZASsp2ZndE,2812
|
|
15
|
+
replay/metrics/base_metric.py,sha256=uleW5vLrdA3iRx72tFyW0cxe6ne_ugQ1XaY_ZTcnAOo,15960
|
|
16
|
+
replay/metrics/categorical_diversity.py,sha256=OYsF-Ng-WrF9CC-sKgQKngrA779NO8MtgRvvAyC8MXM,10781
|
|
17
|
+
replay/metrics/coverage.py,sha256=wE1Y_TgKOzf_9ixeas-vsxANAHeHSGPuGrzKk8DklaY,8843
|
|
18
|
+
replay/metrics/descriptors.py,sha256=BHORyGKfJgPeUjgLO0u2urSTe16UQbb-HHh8soqnwDE,3893
|
|
19
|
+
replay/metrics/experiment.py,sha256=Ab43e-eyoy8RM_OgAcm0toqFNDaWkh5Gbq_XVxbmqB0,8142
|
|
20
|
+
replay/metrics/hitrate.py,sha256=G0_GufGc1RIQ_RntCIvDn0piJIVyT3W35bTN-Un9-Ug,2339
|
|
21
|
+
replay/metrics/map.py,sha256=qUPFkyrvRf7gEyxosBApHOxpAVZJ3Yksqe4vsbzTukE,2597
|
|
22
|
+
replay/metrics/mrr.py,sha256=AkfRdvL21Ri1zE4Cmib2twJaEaeBO5wze87eZ4lkYlc,2188
|
|
23
|
+
replay/metrics/ndcg.py,sha256=wzdFFf9rmG6yH7klK5V9ab1wNdI6CHXezvYasxO9_ZU,3134
|
|
24
|
+
replay/metrics/novelty.py,sha256=4S6PsdOl8lqweBlemWJh2fVqMiN8jfj3cIU-9TxZXLY,5500
|
|
25
|
+
replay/metrics/offline_metrics.py,sha256=bVI47C-oTxyi-ksTEdTckX3fT_Dz7koi0ICTgUmsnRM,20468
|
|
26
|
+
replay/metrics/precision.py,sha256=8hfz9bLZt7wUfyJtJPiIoDEOP_bweZSpCS6Z1Cm9IGE,2281
|
|
27
|
+
replay/metrics/recall.py,sha256=ATN5OIeFKMG7-nqpPCBPLItrUhGRucLCELWnzRyCRoo,2472
|
|
28
|
+
replay/metrics/rocauc.py,sha256=gybFcyIyVDpwoR9K38nOSxlAjIYbFqXUzABuckxqHps,3307
|
|
29
|
+
replay/metrics/surprisal.py,sha256=wj9Q5mAdECpl0LfykJWt8jgN3_CUSlai2fhiFgJr_Vw,7474
|
|
30
|
+
replay/metrics/torch_metrics_builder.py,sha256=2gcCcb0A-TVpYcBIYGhXrggyFX-M_T7Q1pQUiMpxEZE,13845
|
|
31
|
+
replay/metrics/unexpectedness.py,sha256=cfDnkpK6nPeawwHDVNQAkUtsW0SvAttI84k4M5ttkyo,6888
|
|
32
|
+
replay/models/__init__.py,sha256=_4gNsauyrVMYEoFDihPYY9kGuBGGFyy1krvxF7oEYjk,808
|
|
33
|
+
replay/models/als.py,sha256=dpBwyg1ZBqtdgrFluHaq5nuPQT---fmA-N2TspJAM0U,6232
|
|
34
|
+
replay/models/association_rules.py,sha256=cp4myXvMqro6zLMjJzJMb0DZ5DQFQEZvhqf5OBgBw8Y,14659
|
|
35
|
+
replay/models/base_neighbour_rec.py,sha256=zMORSm4uMQSNj12v0n_6w8fVHgSYjeiqyYE9rrWgSfU,7887
|
|
36
|
+
replay/models/base_rec.py,sha256=iF0eMlNQVcd-nb3aCRG3ObpmEi7P4-jP_5mKjwc6anc,66407
|
|
37
|
+
replay/models/cat_pop_rec.py,sha256=tzI1UMlC3kEOrtDZ1UPpCP13tX8CeDJP7PHwQKl9Mmo,11922
|
|
38
|
+
replay/models/cluster.py,sha256=9JcpGnbfgFa4UsyxPAa4WMuJFa3rsuAxiKoy-s_UfyE,4970
|
|
39
|
+
replay/models/extensions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
+
replay/models/extensions/ann/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
|
+
replay/models/extensions/ann/ann_mixin.py,sha256=30YTIGnRhGJYOnxo2bM6dQFgDq9i5UH4t8Ln8zCW8TM,7288
|
|
42
|
+
replay/models/extensions/ann/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
|
+
replay/models/extensions/ann/entities/base_hnsw_param.py,sha256=5GRdcQj4-zhNXfJ7ko2WHGHgRuXCzSHCRcRxljl1V4c,776
|
|
44
|
+
replay/models/extensions/ann/entities/hnswlib_param.py,sha256=j3V4JXM_yfR6s2TjYakIXMg-zS1-MrP6an930DEIWGM,2104
|
|
45
|
+
replay/models/extensions/ann/entities/nmslib_hnsw_param.py,sha256=WeEhRR4jKqgvWK_zDK8fx6kEqc2e_bc0kubvqK3iV8c,2162
|
|
46
|
+
replay/models/extensions/ann/index_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
+
replay/models/extensions/ann/index_builders/base_index_builder.py,sha256=Ul25G0FaNLOXUjrDXxZDTg7tLXlv1N6wR8kWjWICtZ0,2110
|
|
48
|
+
replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py,sha256=bUoDwelKL9OK4z0IABzDm16t8wQ4QzH3wQ5NP32S_IE,1588
|
|
49
|
+
replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py,sha256=1NLWyAJGYgp46uUBhUYQyd0stmG6DhLh7U4JEne5TFw,1308
|
|
50
|
+
replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py,sha256=_JZfewcLsU3sRR95_FPlgLfn9vez0JXMxer_024UHK4,2449
|
|
51
|
+
replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py,sha256=_TClG_czFb50dXyUjjXL4HE5j_i4BH011wjzDGba2U8,2591
|
|
52
|
+
replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py,sha256=AIkVnobesnTM5lrBSWf9gd0CySwFQ0vH_DjemfLS4Cs,1925
|
|
53
|
+
replay/models/extensions/ann/index_inferers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
+
replay/models/extensions/ann/index_inferers/base_inferer.py,sha256=I39aqEc2somfndrCd-KC3XYZnYSrJ2hGpR9y6wO93NA,2524
|
|
55
|
+
replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py,sha256=JjT4l_XAjzUOsTAE7OS88zAgPd_h_O44oUnn2kVr8E0,2477
|
|
56
|
+
replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py,sha256=CoY_oMfdcwnh87ceuSpHXu4Czle9xxeMisO8XJUuJLE,1717
|
|
57
|
+
replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py,sha256=1bpBjRhj4J_ecaORRhkhEke7ImJcxVTFRmmGK2wISB4,3120
|
|
58
|
+
replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py,sha256=TqyunbjMQp1bWltbouvqK2kr2cnER6_d75NuCTVB3O0,2195
|
|
59
|
+
replay/models/extensions/ann/index_inferers/utils.py,sha256=6IST2FPSY3nuYu5KqzRpd4FgdaV3GnQRQlxp9LN_yyA,641
|
|
60
|
+
replay/models/extensions/ann/index_stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
|
+
replay/models/extensions/ann/index_stores/base_index_store.py,sha256=u4l2ybAXX92ZMGK7NqqosbKF75QgFqhAMVadd5ePj6Y,910
|
|
62
|
+
replay/models/extensions/ann/index_stores/hdfs_index_store.py,sha256=0zDq9PdDOiD6HvtZlfjTbuJHfeTOWBTQ_HiuqZmoxtA,3090
|
|
63
|
+
replay/models/extensions/ann/index_stores/shared_disk_index_store.py,sha256=thl4T1uYU4Gtk4nBao_qK8CbFBdX1xmXNishxgfCd-I,2030
|
|
64
|
+
replay/models/extensions/ann/index_stores/spark_files_index_store.py,sha256=QP_8mE7EIBbePIe0AB-IWuJLRA5MR3wswCEt8oHzn-0,3617
|
|
65
|
+
replay/models/extensions/ann/index_stores/utils.py,sha256=6r2GP_EFCaCguolW857pb4lRS8rh6_Nv_Edso9_j5no,3756
|
|
66
|
+
replay/models/extensions/ann/utils.py,sha256=qLeT0wHzbzBU5J6SE3xbbT3vYrLrFcEEQNJCtElvP_U,1213
|
|
67
|
+
replay/models/kl_ucb.py,sha256=L6vC2KsTBTTx4ckmGhWybOiLa5Wt54N7cgl7jS2FQRg,6731
|
|
68
|
+
replay/models/knn.py,sha256=ujJRiRJ9Nfs_m7eTvk0fDTYiR95aKnDO4e2Tf2q2AQI,9109
|
|
69
|
+
replay/models/nn/__init__.py,sha256=AT3o1qXaxUq4_QIGlcGuSs54ZpueOo-SbpZwuGI-6os,41
|
|
70
|
+
replay/models/nn/optimizer_utils/__init__.py,sha256=8MHln7CW54oACVUFKdZLjAf4bY83GcOMXpmL87gTnhI,178
|
|
71
|
+
replay/models/nn/optimizer_utils/optimizer_factory.py,sha256=EXqS_2ZxT7K_kFzcjgFpNhmzUoSftf_CwZIIHNHZBgU,2842
|
|
72
|
+
replay/models/nn/sequential/__init__.py,sha256=CI2n0cxs_amqJrwBMq6n0Z_uBOu7CGXfagqvE4Jlmjw,128
|
|
73
|
+
replay/models/nn/sequential/bert4rec/__init__.py,sha256=JfZqHOGxcvOkICl5cWmZbZhaKXpkIvua-Wj57VWWEhw,399
|
|
74
|
+
replay/models/nn/sequential/bert4rec/dataset.py,sha256=sCnYGF-sQ1YlLq7vofQo2GIIlc59YlbUgmW7bHI6MPg,10324
|
|
75
|
+
replay/models/nn/sequential/bert4rec/lightning.py,sha256=TqO0V-g0JA0D-L2t08AgAIQgBkDtLUgl4xqekSiDWJ4,22605
|
|
76
|
+
replay/models/nn/sequential/bert4rec/model.py,sha256=tiAiKOUwk3iPPYWyWkfOF23IzfL1NbeaF-8kNt9uZlU,21303
|
|
77
|
+
replay/models/nn/sequential/callbacks/__init__.py,sha256=Q7mSZ_RB6iyD7QZaBL_NJ0uh8cRfgxq7gtPHbkSyhoo,282
|
|
78
|
+
replay/models/nn/sequential/callbacks/prediction_callbacks.py,sha256=H4MZ87_N0hCKtHbsTuN-Cq_SJ-n9TSkvv2okuGnwo3M,9045
|
|
79
|
+
replay/models/nn/sequential/callbacks/validation_callback.py,sha256=6TNl3NN9oahK1J7DT44461xqBuUCblCsLzUi2svlhF4,5825
|
|
80
|
+
replay/models/nn/sequential/postprocessors/__init__.py,sha256=89LGzkNHukcuC2-rfpiz7vmv1zyk6MNY-8zaXrvtn0M,164
|
|
81
|
+
replay/models/nn/sequential/postprocessors/_base.py,sha256=Z-HuXM4WBQtrq9IeCL00jtdb_UopksInxVVK8lxD_hA,1136
|
|
82
|
+
replay/models/nn/sequential/postprocessors/postprocessors.py,sha256=V32xMyNPztJ5kapUxYSAz9i_vsqJ6_cjpGblmQGPO6A,6781
|
|
83
|
+
replay/models/nn/sequential/sasrec/__init__.py,sha256=c6130lRpPkcbuGgkM7slagBIgH7Uk5zUtSzFDEwAsik,250
|
|
84
|
+
replay/models/nn/sequential/sasrec/dataset.py,sha256=ReGNc6t9jjXxMZJp0WqFj1jatJFHnWOrkK3W8lwBNIs,7036
|
|
85
|
+
replay/models/nn/sequential/sasrec/lightning.py,sha256=DtLnNikTNvqroCzaVFw7u-QZpZdvwiYbCwJLE7FkHms,21397
|
|
86
|
+
replay/models/nn/sequential/sasrec/model.py,sha256=DE9kaqlcL22v07kpi2IzIwZ4-3AXNBVTZCnfuTS5usg,27775
|
|
87
|
+
replay/models/pop_rec.py,sha256=Ju9y2rU2vW_jFU9-W15fbbr5_ZzYGihSjSxsqKsAf0Q,4964
|
|
88
|
+
replay/models/query_pop_rec.py,sha256=UNsHtf3eQpJom73ZmEO5us4guI4SnCLJYTfuUpRgqes,4086
|
|
89
|
+
replay/models/random_rec.py,sha256=9SC012_X3sNzrAjDG1CPGhjisZb6gnv4VCW7yIMSNpk,8066
|
|
90
|
+
replay/models/slim.py,sha256=LHQl91PAsiCA8wLYzyapW1DVmqzGaCwtydBvZKC5yo0,4723
|
|
91
|
+
replay/models/thompson_sampling.py,sha256=gcjlVl1mPiEVt70y8frA762O-eCZzd3SVg1lnDRCEHk,1939
|
|
92
|
+
replay/models/ucb.py,sha256=X98ulD8L3gWR3VA7rbQkXFqQyzWc-Nt12lp_gbLTfLQ,6964
|
|
93
|
+
replay/models/wilson.py,sha256=o7aUWjq3648dAfgGBoWD5Gu-HzdyobPMaH2lzCLijiA,4558
|
|
94
|
+
replay/models/word2vec.py,sha256=MgoRIS5vqW9cH1HKAGa2xsLLnTH6XC1EXk4Dzvn5lXA,9171
|
|
95
|
+
replay/optimization/__init__.py,sha256=az6U10rF7X6rPRUUPwLyiM1WFNJ_6kl0imA5xLVWFLs,120
|
|
96
|
+
replay/optimization/optuna_objective.py,sha256=Z-8X0_FT3BicVWj0UhxoLrvZAck3Dhn7jHDGo0i0hxA,7653
|
|
97
|
+
replay/preprocessing/__init__.py,sha256=TtBysFqYeDy4kZAEnWEaNSwPvbffYdfMkEs71YG51fM,411
|
|
98
|
+
replay/preprocessing/converter.py,sha256=DczqsVLrwFi6EFhK2HR8rGiIxGCwXeY7QNgWorjA41g,4390
|
|
99
|
+
replay/preprocessing/filters.py,sha256=6MaO4IIyKNFP2AR94YA5iQUhQvuCRhAFfj0opI6o4-Q,33744
|
|
100
|
+
replay/preprocessing/history_based_fp.py,sha256=tfgKJPKm53LSNqM6VmMXYsVrRDc-rP1Tbzn8s3mbziQ,18751
|
|
101
|
+
replay/preprocessing/label_encoder.py,sha256=MLBavPD-dB644as0E9ZJSE9-8QxGCB_IHek1w3xtqDI,27040
|
|
102
|
+
replay/preprocessing/sessionizer.py,sha256=G6i0K3FwqtweRxvcSYraJ-tBWAT2HnV-bWHHlIZJF-s,12217
|
|
103
|
+
replay/scenarios/__init__.py,sha256=kw2wRkPPinw0IBA20D83XQ3xeSudk3KuYAAA1Wdr8xY,93
|
|
104
|
+
replay/scenarios/fallback.py,sha256=EeBmIR-5igzKR2m55bQRFyhxTkpJez6ZkCW449n8hWs,7130
|
|
105
|
+
replay/splitters/__init__.py,sha256=DnqVMelrzLwR8fGQgcWN_8FipGs8T4XGSPOMW-L_x2g,454
|
|
106
|
+
replay/splitters/base_splitter.py,sha256=qWW8Sueu0BrYt0WIxMbzooAC4-jhEmyd6pMND_H_qB0,7751
|
|
107
|
+
replay/splitters/cold_user_random_splitter.py,sha256=gVwBVdn_0IOaLGT_UzJoS9AMaPhelZy-FpC5JQS1PhA,4136
|
|
108
|
+
replay/splitters/k_folds.py,sha256=WH02_DP18A2ae893ysonmfLPB56_i1ETllTAwaCYekg,6218
|
|
109
|
+
replay/splitters/last_n_splitter.py,sha256=r9kdq2JPi508C9ywjwc68an-iq27KsigMfHWLz0YohE,15346
|
|
110
|
+
replay/splitters/new_users_splitter.py,sha256=bv_QCPkL7KFxJIovAXQbP3Rlty3My48YNTqrj-2ucFQ,9167
|
|
111
|
+
replay/splitters/random_splitter.py,sha256=mbOcxeF0B9WQ9OSxK8CHkPtO8UzKCZJt3rRyFhn-hyQ,2996
|
|
112
|
+
replay/splitters/ratio_splitter.py,sha256=8zvuCn16Icc4ntQPKXJ5ArAWuJzCZ9NHZtgWctKyBVY,17519
|
|
113
|
+
replay/splitters/time_splitter.py,sha256=iXhuafjBx7dWyJSy-TEVy1IUQBwMpA1gAiF4-GtRe2g,9031
|
|
114
|
+
replay/splitters/two_stage_splitter.py,sha256=PWozxjjgjrVzdz6Sm9dcDTeH0bOA24reFzkk_N_TgbQ,17734
|
|
115
|
+
replay/utils/__init__.py,sha256=vDJgOWq81fbBs-QO4ZDpdqR4KDyO1kMOOxBRi-5Gp7E,253
|
|
116
|
+
replay/utils/common.py,sha256=6JxR5bFuTFTFWad36J5Zu8dFgpFXoof6VsVpF2sD7h8,1471
|
|
117
|
+
replay/utils/dataframe_bucketizer.py,sha256=LipmBBQkdkLGroZpbP9i7qvTombLdMxo2dUUys1m5OY,3748
|
|
118
|
+
replay/utils/distributions.py,sha256=kGGq2KzQZ-yhTuw_vtOsKFXVpXUOQ2l4aIFBcaDufZ8,1202
|
|
119
|
+
replay/utils/model_handler.py,sha256=V-mHDh8_UexjVSsMBBRA9yrjS_5MPHwYOwv_UrI-Zfs,6466
|
|
120
|
+
replay/utils/session_handler.py,sha256=ijTvDSNAe1D9R1e-dhtd-r80tFNiIBsFdWZLgw-gLEo,5153
|
|
121
|
+
replay/utils/spark_utils.py,sha256=PhNi9fW28ek0ZB90AUg3tsT5BULbQjDhLalxxww9eLE,26700
|
|
122
|
+
replay/utils/time.py,sha256=J8asoQBytPcNw-BLGADYIsKeWhIoN1H5hKiX9t2AMqo,9376
|
|
123
|
+
replay/utils/types.py,sha256=5sw0A7NG4ZgQKdWORnBy0wBZ5F98sP_Ju8SKQ6zbDS4,651
|
|
124
|
+
replay_rec-0.17.0.dist-info/LICENSE,sha256=rPmcA7UrHxBChEAAlJyE24qUWKKl9yLQXxFsKeg_LX4,11344
|
|
125
|
+
replay_rec-0.17.0.dist-info/METADATA,sha256=5wmgvIeFw22ST7-kZrWT9ggEAmne8I7_yHkwN1Bfus8,10588
|
|
126
|
+
replay_rec-0.17.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
|
|
127
|
+
replay_rec-0.17.0.dist-info/RECORD,,
|
replay/experimental/__init__.py
DELETED
|
File without changes
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Most metrics require dataframe with recommendations
|
|
3
|
-
and dataframe with ground truth values —
|
|
4
|
-
which objects each user interacted with.
|
|
5
|
-
|
|
6
|
-
- recommendations (Union[pandas.DataFrame, spark.DataFrame]):
|
|
7
|
-
predictions of a recommender system,
|
|
8
|
-
DataFrame with columns ``[user_id, item_id, relevance]``
|
|
9
|
-
- ground_truth (Union[pandas.DataFrame, spark.DataFrame]):
|
|
10
|
-
test data, DataFrame with columns
|
|
11
|
-
``[user_id, item_id, timestamp, relevance]``
|
|
12
|
-
|
|
13
|
-
Metric is calculated for all users, presented in ``ground_truth``
|
|
14
|
-
for accurate metric calculation in case when the recommender system generated
|
|
15
|
-
recommendation not for all users. It is assumed, that all users,
|
|
16
|
-
we want to calculate metric for, have positive interactions.
|
|
17
|
-
|
|
18
|
-
But if we have users, who observed the recommendations, but have not responded,
|
|
19
|
-
those users will be ignored and metric will be overestimated.
|
|
20
|
-
For such case we propose additional optional parameter ``ground_truth_users``,
|
|
21
|
-
the dataframe with all users, which should be considered during the metric calculation.
|
|
22
|
-
|
|
23
|
-
- ground_truth_users (Optional[Union[pandas.DataFrame, spark.DataFrame]]):
|
|
24
|
-
full list of users to calculate metric for, DataFrame with ``user_id`` column
|
|
25
|
-
|
|
26
|
-
Every metric is calculated using top ``K`` items for each user.
|
|
27
|
-
It is also possible to calculate metrics
|
|
28
|
-
using multiple values for ``K`` simultaneously.
|
|
29
|
-
In this case the result will be a dictionary and not a number.
|
|
30
|
-
|
|
31
|
-
Make sure your recommendations do not contain user-item duplicates
|
|
32
|
-
as duplicates could lead to the wrong calculation results.
|
|
33
|
-
|
|
34
|
-
- k (Union[Iterable[int], int]):
|
|
35
|
-
a single number or a list, specifying the
|
|
36
|
-
truncation length for recommendation list for each user
|
|
37
|
-
|
|
38
|
-
By default, metrics are averaged by users,
|
|
39
|
-
but you can alternatively use method ``metric.median``.
|
|
40
|
-
Also, you can get the lower bound
|
|
41
|
-
of ``conf_interval`` for a given ``alpha``.
|
|
42
|
-
|
|
43
|
-
Diversity metrics require extra parameters on initialization stage,
|
|
44
|
-
but do not use ``ground_truth`` parameter.
|
|
45
|
-
|
|
46
|
-
For each metric, a formula for its calculation is given, because this is
|
|
47
|
-
important for the correct comparison of algorithms, as mentioned in our
|
|
48
|
-
`article <https://arxiv.org/abs/2206.12858>`_.
|
|
49
|
-
"""
|
|
50
|
-
from replay.experimental.metrics.base_metric import Metric, NCISMetric
|
|
51
|
-
from replay.experimental.metrics.coverage import Coverage
|
|
52
|
-
from replay.experimental.metrics.hitrate import HitRate
|
|
53
|
-
from replay.experimental.metrics.map import MAP
|
|
54
|
-
from replay.experimental.metrics.mrr import MRR
|
|
55
|
-
from replay.experimental.metrics.ncis_precision import NCISPrecision
|
|
56
|
-
from replay.experimental.metrics.ndcg import NDCG
|
|
57
|
-
from replay.experimental.metrics.precision import Precision
|
|
58
|
-
from replay.experimental.metrics.recall import Recall
|
|
59
|
-
from replay.experimental.metrics.rocauc import RocAuc
|
|
60
|
-
from replay.experimental.metrics.surprisal import Surprisal
|
|
61
|
-
from replay.experimental.metrics.unexpectedness import Unexpectedness
|