replay-rec 0.20.0__tar.gz → 0.20.0rc0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/PKG-INFO +17 -17
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/README.md +6 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/pyproject.toml +11 -23
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/__init__.py +1 -1
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset.py +10 -9
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset_utils/dataset_label_encoder.py +5 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/schema.py +9 -18
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/sequence_tokenizer.py +16 -15
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/sequential_dataset.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/torch_sequential_dataset.py +5 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/utils.py +2 -1
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/schema.py +3 -12
- replay_rec-0.20.0rc0/replay/experimental/metrics/__init__.py +62 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/base_metric.py +603 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/coverage.py +97 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/experiment.py +175 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/hitrate.py +26 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/map.py +30 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/mrr.py +18 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/ncis_precision.py +31 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/ndcg.py +49 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/precision.py +22 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/recall.py +25 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/rocauc.py +49 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/surprisal.py +90 -0
- replay_rec-0.20.0rc0/replay/experimental/metrics/unexpectedness.py +76 -0
- replay_rec-0.20.0rc0/replay/experimental/models/__init__.py +50 -0
- replay_rec-0.20.0rc0/replay/experimental/models/admm_slim.py +257 -0
- replay_rec-0.20.0rc0/replay/experimental/models/base_neighbour_rec.py +200 -0
- replay_rec-0.20.0rc0/replay/experimental/models/base_rec.py +1386 -0
- replay_rec-0.20.0rc0/replay/experimental/models/base_torch_rec.py +234 -0
- replay_rec-0.20.0rc0/replay/experimental/models/cql.py +454 -0
- replay_rec-0.20.0rc0/replay/experimental/models/ddpg.py +932 -0
- replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/dt4rec.py +189 -0
- replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/gpt1.py +401 -0
- replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/trainer.py +127 -0
- replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/utils.py +264 -0
- replay_rec-0.20.0rc0/replay/experimental/models/extensions/spark_custom_models/als_extension.py +792 -0
- replay_rec-0.20.0rc0/replay/experimental/models/hierarchical_recommender.py +331 -0
- replay_rec-0.20.0rc0/replay/experimental/models/implicit_wrap.py +131 -0
- replay_rec-0.20.0rc0/replay/experimental/models/lightfm_wrap.py +303 -0
- replay_rec-0.20.0rc0/replay/experimental/models/mult_vae.py +332 -0
- replay_rec-0.20.0rc0/replay/experimental/models/neural_ts.py +986 -0
- replay_rec-0.20.0rc0/replay/experimental/models/neuromf.py +406 -0
- replay_rec-0.20.0rc0/replay/experimental/models/scala_als.py +293 -0
- replay_rec-0.20.0rc0/replay/experimental/models/u_lin_ucb.py +115 -0
- replay_rec-0.20.0rc0/replay/experimental/nn/data/__init__.py +1 -0
- replay_rec-0.20.0rc0/replay/experimental/nn/data/schema_builder.py +102 -0
- replay_rec-0.20.0rc0/replay/experimental/preprocessing/__init__.py +3 -0
- replay_rec-0.20.0rc0/replay/experimental/preprocessing/data_preparator.py +839 -0
- replay_rec-0.20.0rc0/replay/experimental/preprocessing/padder.py +229 -0
- replay_rec-0.20.0rc0/replay/experimental/preprocessing/sequence_generator.py +208 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/__init__.py +1 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/__init__.py +8 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +74 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/replay_offline.py +261 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/utils.py +85 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/two_stages/reranker.py +117 -0
- replay_rec-0.20.0rc0/replay/experimental/scenarios/two_stages/two_stages_scenario.py +757 -0
- replay_rec-0.20.0rc0/replay/experimental/utils/logger.py +24 -0
- replay_rec-0.20.0rc0/replay/experimental/utils/model_handler.py +186 -0
- replay_rec-0.20.0rc0/replay/experimental/utils/session_handler.py +44 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/base_metric.py +11 -10
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/categorical_diversity.py +8 -8
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/coverage.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/experiment.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/hitrate.py +1 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/map.py +1 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/mrr.py +1 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/ndcg.py +1 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/novelty.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/offline_metrics.py +16 -16
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/precision.py +1 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/recall.py +1 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/rocauc.py +1 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/surprisal.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/torch_metrics_builder.py +13 -12
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/unexpectedness.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/als.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/association_rules.py +4 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/base_neighbour_rec.py +3 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/base_rec.py +11 -10
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/cat_pop_rec.py +2 -1
- replay_rec-0.20.0rc0/replay/models/extensions/ann/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/ann_mixin.py +2 -1
- replay_rec-0.20.0rc0/replay/models/extensions/ann/entities/__init__.py +0 -0
- replay_rec-0.20.0rc0/replay/models/extensions/ann/index_builders/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +2 -1
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +2 -1
- replay_rec-0.20.0rc0/replay/models/extensions/ann/index_inferers/__init__.py +0 -0
- replay_rec-0.20.0rc0/replay/models/extensions/ann/index_stores/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/lin_ucb.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/optimizer_utils/optimizer_factory.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/dataset.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/lightning.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/model.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/callbacks/prediction_callbacks.py +12 -12
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/callbacks/validation_callback.py +9 -9
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/base_compiled_model.py +5 -5
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/postprocessors/_base.py +2 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/postprocessors/postprocessors.py +10 -10
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/lightning.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/model.py +8 -8
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/slim.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/ucb.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/word2vec.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/discretizer.py +8 -7
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/filters.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/history_based_fp.py +6 -6
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/label_encoder.py +8 -7
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/scenarios/fallback.py +4 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/base_splitter.py +3 -3
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/cold_user_random_splitter.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/k_folds.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/last_n_splitter.py +10 -10
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/new_users_splitter.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/random_splitter.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/ratio_splitter.py +10 -10
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/time_splitter.py +6 -6
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/two_stage_splitter.py +4 -4
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/__init__.py +1 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/common.py +1 -1
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/session_handler.py +2 -2
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/spark_utils.py +6 -5
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/types.py +3 -1
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/LICENSE +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/NOTICE +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset_utils/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/spark_schema.py +0 -0
- {replay_rec-0.20.0/replay/models/extensions → replay_rec-0.20.0rc0/replay/experimental}/__init__.py +0 -0
- {replay_rec-0.20.0/replay/models/extensions/ann → replay_rec-0.20.0rc0/replay/experimental/models/dt4rec}/__init__.py +0 -0
- {replay_rec-0.20.0/replay/models/extensions/ann/entities → replay_rec-0.20.0rc0/replay/experimental/models/extensions/spark_custom_models}/__init__.py +0 -0
- {replay_rec-0.20.0/replay/models/extensions/ann/index_builders → replay_rec-0.20.0rc0/replay/experimental/scenarios/two_stages}/__init__.py +0 -0
- {replay_rec-0.20.0/replay/models/extensions/ann/index_inferers → replay_rec-0.20.0rc0/replay/experimental/utils}/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/descriptors.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/cluster.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/common.py +0 -0
- {replay_rec-0.20.0/replay/models/extensions/ann/index_stores → replay_rec-0.20.0rc0/replay/models/extensions}/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/entities/base_hnsw_param.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/entities/hnswlib_param.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/base_index_builder.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/base_inferer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/utils.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/base_index_store.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/hdfs_index_store.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/shared_disk_index_store.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/spark_files_index_store.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/utils.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/utils.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/kl_ucb.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/knn.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/loss/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/loss/sce.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/optimizer_utils/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/callbacks/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/bert4rec_compiled.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/sasrec_compiled.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/postprocessors/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/dataset.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/optimization/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/optimization/optuna_mixin.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/optimization/optuna_objective.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/pop_rec.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/query_pop_rec.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/random_rec.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/thompson_sampling.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/wilson.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/converter.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/sessionizer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/scenarios/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/__init__.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/dataframe_bucketizer.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/distributions.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/model_handler.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/time.py +0 -0
- {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/warnings.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: replay-rec
|
|
3
|
-
Version: 0.20.
|
|
3
|
+
Version: 0.20.0rc0
|
|
4
4
|
Summary: RecSys Library
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -14,29 +14,23 @@ Classifier: Intended Audience :: Developers
|
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
15
15
|
Classifier: Natural Language :: English
|
|
16
16
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
Requires-Dist: lightning (
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist: lightning ; extra == "torch-cpu"
|
|
17
|
+
Requires-Dist: d3rlpy (>=2.8.1,<2.9)
|
|
18
|
+
Requires-Dist: implicit (>=0.7.2,<0.8)
|
|
19
|
+
Requires-Dist: lightautoml (>=0.4.1,<0.5)
|
|
20
|
+
Requires-Dist: lightning (>=2.0.2,<=2.4.0)
|
|
21
|
+
Requires-Dist: numba (>=0.50,<1)
|
|
23
22
|
Requires-Dist: numpy (>=1.20.0,<2)
|
|
24
23
|
Requires-Dist: pandas (>=1.3.5,<2.4.0)
|
|
25
24
|
Requires-Dist: polars (<2.0)
|
|
26
|
-
Requires-Dist: psutil (<=7.0.0)
|
|
27
|
-
Requires-Dist: psutil ; extra == "spark"
|
|
25
|
+
Requires-Dist: psutil (<=7.0.0)
|
|
28
26
|
Requires-Dist: pyarrow (<22.0)
|
|
29
|
-
Requires-Dist: pyspark (>=3.0,<3.5)
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist:
|
|
32
|
-
Requires-Dist: pytorch-optimizer ; extra == "torch"
|
|
33
|
-
Requires-Dist: pytorch-optimizer ; extra == "torch-cpu"
|
|
27
|
+
Requires-Dist: pyspark (>=3.0,<3.5)
|
|
28
|
+
Requires-Dist: pytorch-optimizer (>=3.8.0,<4)
|
|
29
|
+
Requires-Dist: sb-obp (>=0.5.10,<0.6)
|
|
34
30
|
Requires-Dist: scikit-learn (>=1.6.1,<1.7.0)
|
|
35
31
|
Requires-Dist: scipy (>=1.13.1,<1.14)
|
|
36
32
|
Requires-Dist: setuptools
|
|
37
|
-
Requires-Dist: torch (>=1.8,<3.0.0)
|
|
38
|
-
Requires-Dist: torch ; extra == "torch"
|
|
39
|
-
Requires-Dist: torch ; extra == "torch-cpu"
|
|
33
|
+
Requires-Dist: torch (>=1.8,<3.0.0)
|
|
40
34
|
Requires-Dist: tqdm (>=4.67,<5)
|
|
41
35
|
Project-URL: Homepage, https://sb-ai-lab.github.io/RePlay/
|
|
42
36
|
Project-URL: Repository, https://github.com/sb-ai-lab/RePlay
|
|
@@ -245,6 +239,12 @@ pip install openvino onnx
|
|
|
245
239
|
pip install hnswlib fixed-install-nmslib
|
|
246
240
|
```
|
|
247
241
|
|
|
242
|
+
4) (Experimental) LightFM model support:
|
|
243
|
+
```bash
|
|
244
|
+
pip install ligfhtfm
|
|
245
|
+
```
|
|
246
|
+
> **_NOTE_** : LightFM is not officially supported for Python 3.12 due to discontinued maintenance of the library. If you wish to install it locally, you'll have to use a patched fork of LightFM, such as the [one used internally](https://github.com/daviddavo/lightfm).
|
|
247
|
+
|
|
248
248
|
|
|
249
249
|
<a name="examples"></a>
|
|
250
250
|
## 📑 Resources
|
|
@@ -201,6 +201,12 @@ pip install openvino onnx
|
|
|
201
201
|
pip install hnswlib fixed-install-nmslib
|
|
202
202
|
```
|
|
203
203
|
|
|
204
|
+
4) (Experimental) LightFM model support:
|
|
205
|
+
```bash
|
|
206
|
+
pip install ligfhtfm
|
|
207
|
+
```
|
|
208
|
+
> **_NOTE_** : LightFM is not officially supported for Python 3.12 due to discontinued maintenance of the library. If you wish to install it locally, you'll have to use a patched fork of LightFM, such as the [one used internally](https://github.com/daviddavo/lightfm).
|
|
209
|
+
|
|
204
210
|
|
|
205
211
|
<a name="examples"></a>
|
|
206
212
|
## 📑 Resources
|
|
@@ -40,19 +40,19 @@ dependencies = [
|
|
|
40
40
|
"scikit-learn (>=1.6.1,<1.7.0)",
|
|
41
41
|
"pyarrow (<22.0)",
|
|
42
42
|
"tqdm (>=4.67,<5)",
|
|
43
|
-
"
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
43
|
+
"torch (>=1.8,<3.0.0)",
|
|
44
|
+
"lightning (>=2.0.2,<=2.4.0)",
|
|
45
|
+
"pytorch-optimizer (>=3.8.0,<4)",
|
|
46
|
+
"lightautoml (>=0.4.1,<0.5)",
|
|
47
|
+
"numba (>=0.50,<1)",
|
|
48
|
+
"sb-obp (>=0.5.10,<0.6)",
|
|
49
|
+
"d3rlpy (>=2.8.1,<2.9)",
|
|
50
|
+
"implicit (>=0.7.2,<0.8)",
|
|
51
|
+
"pyspark (>=3.0,<3.5)",
|
|
52
|
+
"psutil (<=7.0.0)",
|
|
48
53
|
]
|
|
49
54
|
dynamic = ["dependencies"]
|
|
50
|
-
version = "0.20.0"
|
|
51
|
-
|
|
52
|
-
[project.optional-dependencies]
|
|
53
|
-
spark = ["pyspark", "psutil"]
|
|
54
|
-
torch = ["torch", "pytorch-optimizer", "lightning"]
|
|
55
|
-
torch-cpu = ["torch", "pytorch-optimizer", "lightning"]
|
|
55
|
+
version = "0.20.0.preview"
|
|
56
56
|
|
|
57
57
|
[project.urls]
|
|
58
58
|
homepage = "https://sb-ai-lab.github.io/RePlay/"
|
|
@@ -66,13 +66,6 @@ target-version = ["py39", "py310", "py311", "py312"]
|
|
|
66
66
|
packages = [{include = "replay"}]
|
|
67
67
|
exclude = [
|
|
68
68
|
"replay/conftest.py",
|
|
69
|
-
"replay/experimental",
|
|
70
|
-
]
|
|
71
|
-
|
|
72
|
-
[tool.poetry.dependencies]
|
|
73
|
-
torch = [
|
|
74
|
-
{markers = "extra == 'torch-cpu' and extra !='torch'", source = "torch-cpu-mirror"},
|
|
75
|
-
{markers = "extra == 'torch' and extra !='torch-cpu'", source = "PyPI"},
|
|
76
69
|
]
|
|
77
70
|
|
|
78
71
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -95,11 +88,6 @@ docutils = "0.16"
|
|
|
95
88
|
data-science-types = "0.2.23"
|
|
96
89
|
filelock = "~3.14.0"
|
|
97
90
|
|
|
98
|
-
[[tool.poetry.source]]
|
|
99
|
-
name = "torch-cpu-mirror"
|
|
100
|
-
url = "https://download.pytorch.org/whl/cpu"
|
|
101
|
-
priority = "explicit"
|
|
102
|
-
|
|
103
91
|
[tool.poetry-dynamic-versioning]
|
|
104
92
|
enable = false
|
|
105
93
|
format-jinja = """0.20.0{{ env['PACKAGE_SUFFIX'] }}"""
|
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
|
+
from collections.abc import Iterable, Sequence
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import Callable,
|
|
10
|
+
from typing import Callable, Optional, Union
|
|
10
11
|
|
|
11
12
|
import numpy as np
|
|
12
13
|
from pandas import read_parquet as pd_read_parquet
|
|
@@ -315,7 +316,7 @@ class Dataset:
|
|
|
315
316
|
:returns: Loaded Dataset.
|
|
316
317
|
"""
|
|
317
318
|
base_path = Path(path).with_suffix(".replay").resolve()
|
|
318
|
-
with open(base_path / "init_args.json"
|
|
319
|
+
with open(base_path / "init_args.json") as file:
|
|
319
320
|
dataset_dict = json.loads(file.read())
|
|
320
321
|
|
|
321
322
|
if dataframe_type not in ["pandas", "spark", "polars", None]:
|
|
@@ -436,14 +437,14 @@ class Dataset:
|
|
|
436
437
|
)
|
|
437
438
|
|
|
438
439
|
def _get_feature_source_map(self):
|
|
439
|
-
self._feature_source_map:
|
|
440
|
+
self._feature_source_map: dict[FeatureSource, DataFrameLike] = {
|
|
440
441
|
FeatureSource.INTERACTIONS: self.interactions,
|
|
441
442
|
FeatureSource.QUERY_FEATURES: self.query_features,
|
|
442
443
|
FeatureSource.ITEM_FEATURES: self.item_features,
|
|
443
444
|
}
|
|
444
445
|
|
|
445
446
|
def _get_ids_source_map(self):
|
|
446
|
-
self._ids_feature_map:
|
|
447
|
+
self._ids_feature_map: dict[FeatureHint, DataFrameLike] = {
|
|
447
448
|
FeatureHint.QUERY_ID: self.query_features if self.query_features is not None else self.interactions,
|
|
448
449
|
FeatureHint.ITEM_ID: self.item_features if self.item_features is not None else self.interactions,
|
|
449
450
|
}
|
|
@@ -499,10 +500,10 @@ class Dataset:
|
|
|
499
500
|
)
|
|
500
501
|
return FeatureSchema(features_list=features_list + filled_features)
|
|
501
502
|
|
|
502
|
-
def _fill_unlabeled_features_sources(self, feature_schema: FeatureSchema) ->
|
|
503
|
+
def _fill_unlabeled_features_sources(self, feature_schema: FeatureSchema) -> list[FeatureInfo]:
|
|
503
504
|
features_list = list(feature_schema.all_features)
|
|
504
505
|
|
|
505
|
-
source_mapping:
|
|
506
|
+
source_mapping: dict[str, FeatureSource] = {}
|
|
506
507
|
for source in FeatureSource:
|
|
507
508
|
dataframe = self._feature_source_map[source]
|
|
508
509
|
if dataframe is not None:
|
|
@@ -524,7 +525,7 @@ class Dataset:
|
|
|
524
525
|
self._set_cardinality(features_list=features_list)
|
|
525
526
|
return features_list
|
|
526
527
|
|
|
527
|
-
def _get_unlabeled_columns(self, source: FeatureSource, feature_schema: FeatureSchema) ->
|
|
528
|
+
def _get_unlabeled_columns(self, source: FeatureSource, feature_schema: FeatureSchema) -> list[FeatureInfo]:
|
|
528
529
|
set_source_dataframe_columns = set(self._feature_source_map[source].columns)
|
|
529
530
|
set_labeled_dataframe_columns = set(feature_schema.columns)
|
|
530
531
|
unlabeled_columns = set_source_dataframe_columns - set_labeled_dataframe_columns
|
|
@@ -534,13 +535,13 @@ class Dataset:
|
|
|
534
535
|
]
|
|
535
536
|
return unlabeled_features_list
|
|
536
537
|
|
|
537
|
-
def _fill_unlabeled_features(self, source: FeatureSource, feature_schema: FeatureSchema) ->
|
|
538
|
+
def _fill_unlabeled_features(self, source: FeatureSource, feature_schema: FeatureSchema) -> list[FeatureInfo]:
|
|
538
539
|
unlabeled_columns = self._get_unlabeled_columns(source=source, feature_schema=feature_schema)
|
|
539
540
|
self._set_features_source(feature_list=unlabeled_columns, source=source)
|
|
540
541
|
self._set_cardinality(features_list=unlabeled_columns)
|
|
541
542
|
return unlabeled_columns
|
|
542
543
|
|
|
543
|
-
def _set_features_source(self, feature_list:
|
|
544
|
+
def _set_features_source(self, feature_list: list[FeatureInfo], source: FeatureSource) -> None:
|
|
544
545
|
for feature in feature_list:
|
|
545
546
|
feature._set_feature_source(source)
|
|
546
547
|
|
{replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset_utils/dataset_label_encoder.py
RENAMED
|
@@ -6,7 +6,8 @@ Contains classes for encoding categorical data
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import warnings
|
|
9
|
-
from
|
|
9
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
10
|
+
from typing import Optional, Union
|
|
10
11
|
|
|
11
12
|
from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
|
|
12
13
|
from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
|
|
@@ -45,9 +46,9 @@ class DatasetLabelEncoder:
|
|
|
45
46
|
"""
|
|
46
47
|
self._handle_unknown_rule = handle_unknown_rule
|
|
47
48
|
self._default_value_rule = default_value_rule
|
|
48
|
-
self._encoding_rules:
|
|
49
|
+
self._encoding_rules: dict[str, LabelEncodingRule] = {}
|
|
49
50
|
|
|
50
|
-
self._features_columns:
|
|
51
|
+
self._features_columns: dict[Union[FeatureHint, FeatureSource], Sequence[str]] = {}
|
|
51
52
|
|
|
52
53
|
def fit(self, dataset: Dataset) -> "DatasetLabelEncoder":
|
|
53
54
|
"""
|
|
@@ -161,7 +162,7 @@ class DatasetLabelEncoder:
|
|
|
161
162
|
"""
|
|
162
163
|
self._check_if_initialized()
|
|
163
164
|
|
|
164
|
-
columns_set:
|
|
165
|
+
columns_set: set[str]
|
|
165
166
|
columns_set = {columns} if isinstance(columns, str) else {*columns}
|
|
166
167
|
|
|
167
168
|
def get_encoding_rules() -> Iterator[LabelEncodingRule]:
|
|
@@ -1,17 +1,8 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
from collections.abc import ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView
|
|
1
3
|
from typing import (
|
|
2
|
-
Dict,
|
|
3
|
-
ItemsView,
|
|
4
|
-
Iterable,
|
|
5
|
-
Iterator,
|
|
6
|
-
KeysView,
|
|
7
|
-
List,
|
|
8
|
-
Mapping,
|
|
9
4
|
Optional,
|
|
10
|
-
OrderedDict,
|
|
11
|
-
Sequence,
|
|
12
|
-
Set,
|
|
13
5
|
Union,
|
|
14
|
-
ValuesView,
|
|
15
6
|
)
|
|
16
7
|
|
|
17
8
|
import torch
|
|
@@ -20,7 +11,7 @@ from replay.data import FeatureHint, FeatureSource, FeatureType
|
|
|
20
11
|
|
|
21
12
|
# Alias
|
|
22
13
|
TensorMap = Mapping[str, torch.Tensor]
|
|
23
|
-
MutableTensorMap =
|
|
14
|
+
MutableTensorMap = dict[str, torch.Tensor]
|
|
24
15
|
|
|
25
16
|
|
|
26
17
|
class TensorFeatureSource:
|
|
@@ -79,7 +70,7 @@ class TensorFeatureInfo:
|
|
|
79
70
|
feature_type: FeatureType,
|
|
80
71
|
is_seq: bool = False,
|
|
81
72
|
feature_hint: Optional[FeatureHint] = None,
|
|
82
|
-
feature_sources: Optional[
|
|
73
|
+
feature_sources: Optional[list[TensorFeatureSource]] = None,
|
|
83
74
|
cardinality: Optional[int] = None,
|
|
84
75
|
padding_value: int = 0,
|
|
85
76
|
embedding_dim: Optional[int] = None,
|
|
@@ -154,13 +145,13 @@ class TensorFeatureInfo:
|
|
|
154
145
|
self._feature_hint = hint
|
|
155
146
|
|
|
156
147
|
@property
|
|
157
|
-
def feature_sources(self) -> Optional[
|
|
148
|
+
def feature_sources(self) -> Optional[list[TensorFeatureSource]]:
|
|
158
149
|
"""
|
|
159
150
|
:returns: List of sources feature came from.
|
|
160
151
|
"""
|
|
161
152
|
return self._feature_sources
|
|
162
153
|
|
|
163
|
-
def _set_feature_sources(self, sources:
|
|
154
|
+
def _set_feature_sources(self, sources: list[TensorFeatureSource]) -> None:
|
|
164
155
|
self._feature_sources = sources
|
|
165
156
|
|
|
166
157
|
@property
|
|
@@ -276,7 +267,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
|
|
|
276
267
|
|
|
277
268
|
:returns: New tensor schema of given features.
|
|
278
269
|
"""
|
|
279
|
-
features:
|
|
270
|
+
features: set[TensorFeatureInfo] = set()
|
|
280
271
|
for feature_name in features_to_keep:
|
|
281
272
|
features.add(self._tensor_schema[feature_name])
|
|
282
273
|
return TensorSchema(list(features))
|
|
@@ -432,7 +423,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
|
|
|
432
423
|
return None
|
|
433
424
|
return rating_features.item().name
|
|
434
425
|
|
|
435
|
-
def _get_object_args(self) ->
|
|
426
|
+
def _get_object_args(self) -> dict:
|
|
436
427
|
"""
|
|
437
428
|
Returns list of features represented as dictionaries.
|
|
438
429
|
"""
|
|
@@ -456,7 +447,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
|
|
|
456
447
|
return features
|
|
457
448
|
|
|
458
449
|
@classmethod
|
|
459
|
-
def _create_object_by_args(cls, args:
|
|
450
|
+
def _create_object_by_args(cls, args: dict) -> "TensorSchema":
|
|
460
451
|
features_list = []
|
|
461
452
|
for feature_data in args:
|
|
462
453
|
feature_data["feature_sources"] = (
|
|
@@ -2,8 +2,9 @@ import abc
|
|
|
2
2
|
import json
|
|
3
3
|
import pickle
|
|
4
4
|
import warnings
|
|
5
|
+
from collections.abc import Sequence
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING,
|
|
7
|
+
from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
import polars as pl
|
|
@@ -187,7 +188,7 @@ class SequenceTokenizer:
|
|
|
187
188
|
def _group_dataset(
|
|
188
189
|
self,
|
|
189
190
|
dataset: Dataset,
|
|
190
|
-
) ->
|
|
191
|
+
) -> tuple[SequenceDataFrameLike, Optional[SequenceDataFrameLike], Optional[SequenceDataFrameLike]]:
|
|
191
192
|
from replay.data.nn.utils import ensure_pandas, groupby_sequences
|
|
192
193
|
|
|
193
194
|
grouped_interactions = groupby_sequences(
|
|
@@ -268,13 +269,13 @@ class SequenceTokenizer:
|
|
|
268
269
|
tensor_schema: "TensorSchema",
|
|
269
270
|
query_id_column: str,
|
|
270
271
|
item_id_column: str,
|
|
271
|
-
) ->
|
|
272
|
+
) -> set[str]:
|
|
272
273
|
# We need only features, which related to tensor schema, otherwise feature should
|
|
273
274
|
# be ignored for efficiency reasons. The code below does feature filtering, and
|
|
274
275
|
# keeps features used as a source in tensor schema.
|
|
275
276
|
|
|
276
277
|
# Query and item IDs are always needed
|
|
277
|
-
features_subset:
|
|
278
|
+
features_subset: list[str] = [
|
|
278
279
|
query_id_column,
|
|
279
280
|
item_id_column,
|
|
280
281
|
]
|
|
@@ -303,7 +304,7 @@ class SequenceTokenizer:
|
|
|
303
304
|
msg = "All tensor features must have sources defined"
|
|
304
305
|
raise ValueError(msg)
|
|
305
306
|
|
|
306
|
-
source_tables:
|
|
307
|
+
source_tables: list[FeatureSource] = [s.source for s in feature_sources]
|
|
307
308
|
|
|
308
309
|
unexpected_tables = list(filter(lambda x: not isinstance(x, FeatureSource), source_tables))
|
|
309
310
|
if len(unexpected_tables) > 0:
|
|
@@ -327,7 +328,7 @@ class SequenceTokenizer:
|
|
|
327
328
|
tensor_features_to_keep: Optional[Sequence[str]] = None,
|
|
328
329
|
) -> None:
|
|
329
330
|
# Check if all source columns specified in tensor schema exist in provided data frames
|
|
330
|
-
sources_for_tensors:
|
|
331
|
+
sources_for_tensors: list["TensorFeatureSource"] = []
|
|
331
332
|
for tensor_feature_name, tensor_feature in tensor_schema.items():
|
|
332
333
|
if tensor_features_to_keep is not None and tensor_feature_name not in tensor_features_to_keep:
|
|
333
334
|
continue
|
|
@@ -421,7 +422,7 @@ class SequenceTokenizer:
|
|
|
421
422
|
|
|
422
423
|
if not use_pickle:
|
|
423
424
|
base_path = Path(path).with_suffix(".replay").resolve()
|
|
424
|
-
with open(base_path / "init_args.json"
|
|
425
|
+
with open(base_path / "init_args.json") as file:
|
|
425
426
|
tokenizer_dict = json.loads(file.read())
|
|
426
427
|
|
|
427
428
|
# load tensor_schema, tensor_features
|
|
@@ -625,7 +626,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
625
626
|
"""
|
|
626
627
|
:returns: processed Pandas DataFrame with all features from tensor schema.
|
|
627
628
|
"""
|
|
628
|
-
all_features:
|
|
629
|
+
all_features: dict[str, Union[np.ndarray, list[np.ndarray]]] = {}
|
|
629
630
|
all_features[self._query_id_column] = self._grouped_interactions[self._query_id_column].values
|
|
630
631
|
|
|
631
632
|
for tensor_feature_name in self._tensor_schema:
|
|
@@ -635,7 +636,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
635
636
|
|
|
636
637
|
def _process_num_interaction_feature(
|
|
637
638
|
self, tensor_feature: "TensorFeatureInfo"
|
|
638
|
-
) -> Union[
|
|
639
|
+
) -> Union[list[np.ndarray], list[list]]:
|
|
639
640
|
"""
|
|
640
641
|
Process numerical interaction feature.
|
|
641
642
|
|
|
@@ -656,7 +657,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
656
657
|
values.append(np.array(sequence))
|
|
657
658
|
return values
|
|
658
659
|
|
|
659
|
-
def _process_num_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[
|
|
660
|
+
def _process_num_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[list[np.ndarray], list[list]]:
|
|
660
661
|
"""
|
|
661
662
|
Process numerical feature from item features dataset.
|
|
662
663
|
|
|
@@ -682,7 +683,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
682
683
|
|
|
683
684
|
return values
|
|
684
685
|
|
|
685
|
-
def _process_num_query_feature(self, tensor_feature: "TensorFeatureInfo") ->
|
|
686
|
+
def _process_num_query_feature(self, tensor_feature: "TensorFeatureInfo") -> list[np.ndarray]:
|
|
686
687
|
"""
|
|
687
688
|
Process numerical feature from query features dataset.
|
|
688
689
|
|
|
@@ -694,7 +695,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
694
695
|
|
|
695
696
|
def _process_cat_interaction_feature(
|
|
696
697
|
self, tensor_feature: "TensorFeatureInfo"
|
|
697
|
-
) -> Union[
|
|
698
|
+
) -> Union[list[np.ndarray], list[list]]:
|
|
698
699
|
"""
|
|
699
700
|
Process categorical interaction feature.
|
|
700
701
|
|
|
@@ -715,7 +716,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
715
716
|
values.append(np.array(sequence))
|
|
716
717
|
return values
|
|
717
718
|
|
|
718
|
-
def _process_cat_query_feature(self, tensor_feature: "TensorFeatureInfo") ->
|
|
719
|
+
def _process_cat_query_feature(self, tensor_feature: "TensorFeatureInfo") -> list[np.ndarray]:
|
|
719
720
|
"""
|
|
720
721
|
Process categorical feature from query features dataset.
|
|
721
722
|
|
|
@@ -744,7 +745,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
744
745
|
]
|
|
745
746
|
return [np.array([query_feature[i]]).reshape(-1) for i in range(len(self._grouped_interactions))]
|
|
746
747
|
|
|
747
|
-
def _process_cat_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[
|
|
748
|
+
def _process_cat_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[list[np.ndarray], list[list]]:
|
|
748
749
|
"""
|
|
749
750
|
Process categorical feature from item features dataset.
|
|
750
751
|
|
|
@@ -760,7 +761,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
|
|
|
760
761
|
assert source is not None
|
|
761
762
|
|
|
762
763
|
item_feature = self._item_features[source.column]
|
|
763
|
-
values:
|
|
764
|
+
values: list[np.ndarray] = []
|
|
764
765
|
|
|
765
766
|
for item_id_sequence in self._grouped_interactions[self._item_id_column]:
|
|
766
767
|
feature_sequence = item_feature.loc[item_id_sequence].values
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import json
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING,
|
|
4
|
+
from typing import TYPE_CHECKING, Union
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import pandas as pd
|
|
@@ -90,7 +90,7 @@ class SequentialDataset(abc.ABC):
|
|
|
90
90
|
@staticmethod
|
|
91
91
|
def keep_common_query_ids(
|
|
92
92
|
lhs: "SequentialDataset", rhs: "SequentialDataset"
|
|
93
|
-
) ->
|
|
93
|
+
) -> tuple["SequentialDataset", "SequentialDataset"]:
|
|
94
94
|
"""
|
|
95
95
|
Returns `SequentialDatasets` that contain query ids from both datasets.
|
|
96
96
|
|
|
@@ -203,7 +203,7 @@ class PandasSequentialDataset(SequentialDataset):
|
|
|
203
203
|
from replay.data.nn import TensorSchema
|
|
204
204
|
|
|
205
205
|
base_path = Path(path).with_suffix(".replay").resolve()
|
|
206
|
-
with open(base_path / "init_args.json"
|
|
206
|
+
with open(base_path / "init_args.json") as file:
|
|
207
207
|
sequential_dict = json.loads(file.read())
|
|
208
208
|
|
|
209
209
|
sequences = pd.read_json(base_path / sequential_dict["init_args"]["sequences_path"])
|
|
@@ -287,7 +287,7 @@ class PolarsSequentialDataset(PandasSequentialDataset):
|
|
|
287
287
|
from replay.data.nn import TensorSchema
|
|
288
288
|
|
|
289
289
|
base_path = Path(path).with_suffix(".replay").resolve()
|
|
290
|
-
with open(base_path / "init_args.json"
|
|
290
|
+
with open(base_path / "init_args.json") as file:
|
|
291
291
|
sequential_dict = json.loads(file.read())
|
|
292
292
|
|
|
293
293
|
sequences = pl.DataFrame(pd.read_json(base_path / sequential_dict["init_args"]["sequences_path"]))
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Generator, Sequence
|
|
2
|
+
from typing import TYPE_CHECKING, NamedTuple, Optional, Union, cast
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
4
5
|
import torch
|
|
@@ -110,7 +111,7 @@ class TorchSequentialDataset(TorchDataset):
|
|
|
110
111
|
return sequence
|
|
111
112
|
|
|
112
113
|
# form shape for padded_sequence. Now supported one and two-dimentions features
|
|
113
|
-
padded_sequence_shape: Union[
|
|
114
|
+
padded_sequence_shape: Union[tuple[int, int], tuple[int]]
|
|
114
115
|
if len(sequence.shape) == 1:
|
|
115
116
|
padded_sequence_shape = (self._max_sequence_length,)
|
|
116
117
|
elif len(sequence.shape) == 2:
|
|
@@ -134,10 +135,10 @@ class TorchSequentialDataset(TorchDataset):
|
|
|
134
135
|
return torch.float32
|
|
135
136
|
assert False, "Unknown tensor feature type"
|
|
136
137
|
|
|
137
|
-
def _build_index2sequence_map(self) -> Sequence[
|
|
138
|
+
def _build_index2sequence_map(self) -> Sequence[tuple[int, int]]:
|
|
138
139
|
return list(self._iter_with_window())
|
|
139
140
|
|
|
140
|
-
def _iter_with_window(self) -> Generator[
|
|
141
|
+
def _iter_with_window(self) -> Generator[tuple[int, int], None, None]:
|
|
141
142
|
for i in range(len(self._sequential)):
|
|
142
143
|
actual_seq_len = self._sequential.get_sequence_length(i)
|
|
143
144
|
left_seq_len = actual_seq_len - self._max_sequence_length
|
|
@@ -1,18 +1,9 @@
|
|
|
1
|
+
from collections.abc import ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView
|
|
1
2
|
from enum import Enum
|
|
2
3
|
from typing import (
|
|
3
4
|
Callable,
|
|
4
|
-
Dict,
|
|
5
|
-
ItemsView,
|
|
6
|
-
Iterable,
|
|
7
|
-
Iterator,
|
|
8
|
-
KeysView,
|
|
9
|
-
List,
|
|
10
|
-
Mapping,
|
|
11
5
|
Optional,
|
|
12
|
-
Sequence,
|
|
13
|
-
Set,
|
|
14
6
|
Union,
|
|
15
|
-
ValuesView,
|
|
16
7
|
)
|
|
17
8
|
|
|
18
9
|
|
|
@@ -162,7 +153,7 @@ class FeatureSchema(Mapping[str, FeatureInfo]):
|
|
|
162
153
|
in original schema to keep in subset.
|
|
163
154
|
:returns: new feature schema of given features.
|
|
164
155
|
"""
|
|
165
|
-
features:
|
|
156
|
+
features: set[FeatureInfo] = set()
|
|
166
157
|
for feature_column in features_to_keep:
|
|
167
158
|
if feature_column in self._features_schema:
|
|
168
159
|
features.add(self._features_schema[feature_column])
|
|
@@ -438,7 +429,7 @@ class FeatureSchema(Mapping[str, FeatureInfo]):
|
|
|
438
429
|
"""
|
|
439
430
|
unique_columns = set()
|
|
440
431
|
duplicates = set()
|
|
441
|
-
item_query_names:
|
|
432
|
+
item_query_names: dict[FeatureHint, list[str]] = {
|
|
442
433
|
FeatureHint.ITEM_ID: [],
|
|
443
434
|
FeatureHint.QUERY_ID: [],
|
|
444
435
|
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Most metrics require dataframe with recommendations
|
|
3
|
+
and dataframe with ground truth values —
|
|
4
|
+
which objects each user interacted with.
|
|
5
|
+
|
|
6
|
+
- recommendations (Union[pandas.DataFrame, spark.DataFrame]):
|
|
7
|
+
predictions of a recommender system,
|
|
8
|
+
DataFrame with columns ``[user_id, item_id, relevance]``
|
|
9
|
+
- ground_truth (Union[pandas.DataFrame, spark.DataFrame]):
|
|
10
|
+
test data, DataFrame with columns
|
|
11
|
+
``[user_id, item_id, timestamp, relevance]``
|
|
12
|
+
|
|
13
|
+
Metric is calculated for all users, presented in ``ground_truth``
|
|
14
|
+
for accurate metric calculation in case when the recommender system generated
|
|
15
|
+
recommendation not for all users. It is assumed, that all users,
|
|
16
|
+
we want to calculate metric for, have positive interactions.
|
|
17
|
+
|
|
18
|
+
But if we have users, who observed the recommendations, but have not responded,
|
|
19
|
+
those users will be ignored and metric will be overestimated.
|
|
20
|
+
For such case we propose additional optional parameter ``ground_truth_users``,
|
|
21
|
+
the dataframe with all users, which should be considered during the metric calculation.
|
|
22
|
+
|
|
23
|
+
- ground_truth_users (Optional[Union[pandas.DataFrame, spark.DataFrame]]):
|
|
24
|
+
full list of users to calculate metric for, DataFrame with ``user_id`` column
|
|
25
|
+
|
|
26
|
+
Every metric is calculated using top ``K`` items for each user.
|
|
27
|
+
It is also possible to calculate metrics
|
|
28
|
+
using multiple values for ``K`` simultaneously.
|
|
29
|
+
In this case the result will be a dictionary and not a number.
|
|
30
|
+
|
|
31
|
+
Make sure your recommendations do not contain user-item duplicates
|
|
32
|
+
as duplicates could lead to the wrong calculation results.
|
|
33
|
+
|
|
34
|
+
- k (Union[Iterable[int], int]):
|
|
35
|
+
a single number or a list, specifying the
|
|
36
|
+
truncation length for recommendation list for each user
|
|
37
|
+
|
|
38
|
+
By default, metrics are averaged by users,
|
|
39
|
+
but you can alternatively use method ``metric.median``.
|
|
40
|
+
Also, you can get the lower bound
|
|
41
|
+
of ``conf_interval`` for a given ``alpha``.
|
|
42
|
+
|
|
43
|
+
Diversity metrics require extra parameters on initialization stage,
|
|
44
|
+
but do not use ``ground_truth`` parameter.
|
|
45
|
+
|
|
46
|
+
For each metric, a formula for its calculation is given, because this is
|
|
47
|
+
important for the correct comparison of algorithms, as mentioned in our
|
|
48
|
+
`article <https://arxiv.org/abs/2206.12858>`_.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
from replay.experimental.metrics.base_metric import Metric, NCISMetric
|
|
52
|
+
from replay.experimental.metrics.coverage import Coverage
|
|
53
|
+
from replay.experimental.metrics.hitrate import HitRate
|
|
54
|
+
from replay.experimental.metrics.map import MAP
|
|
55
|
+
from replay.experimental.metrics.mrr import MRR
|
|
56
|
+
from replay.experimental.metrics.ncis_precision import NCISPrecision
|
|
57
|
+
from replay.experimental.metrics.ndcg import NDCG
|
|
58
|
+
from replay.experimental.metrics.precision import Precision
|
|
59
|
+
from replay.experimental.metrics.recall import Recall
|
|
60
|
+
from replay.experimental.metrics.rocauc import RocAuc
|
|
61
|
+
from replay.experimental.metrics.surprisal import Surprisal
|
|
62
|
+
from replay.experimental.metrics.unexpectedness import Unexpectedness
|