replay-rec 0.18.0rc0__tar.gz → 0.18.1rc0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/PKG-INFO +73 -60
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/README.md +66 -56
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/pyproject.toml +7 -3
- replay_rec-0.18.1rc0/replay/__init__.py +3 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/dataset.py +27 -1
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/dataset_utils/dataset_label_encoder.py +6 -3
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/nn/schema.py +37 -16
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/nn/sequence_tokenizer.py +313 -165
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/nn/torch_sequential_dataset.py +17 -8
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/nn/utils.py +14 -7
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/schema.py +10 -6
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/__init__.py +3 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/base_rec.py +69 -0
- replay_rec-0.18.1rc0/replay/experimental/models/hierarchical_recommender.py +331 -0
- replay_rec-0.18.1rc0/replay/experimental/models/neural_ts.py +986 -0
- replay_rec-0.18.1rc0/replay/experimental/models/u_lin_ucb.py +115 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/nn/data/schema_builder.py +47 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/obp_wrapper/replay_offline.py +40 -27
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/offline_metrics.py +2 -2
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/__init__.py +1 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/base_rec.py +18 -21
- replay_rec-0.18.1rc0/replay/models/lin_ucb.py +407 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/bert4rec/dataset.py +17 -4
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/bert4rec/lightning.py +121 -54
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/bert4rec/model.py +21 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/callbacks/prediction_callbacks.py +5 -1
- replay_rec-0.18.1rc0/replay/models/nn/sequential/compiled/__init__.py +5 -0
- replay_rec-0.18.1rc0/replay/models/nn/sequential/compiled/base_compiled_model.py +261 -0
- replay_rec-0.18.1rc0/replay/models/nn/sequential/compiled/bert4rec_compiled.py +152 -0
- replay_rec-0.18.1rc0/replay/models/nn/sequential/compiled/sasrec_compiled.py +145 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/postprocessors/postprocessors.py +27 -1
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/sasrec/dataset.py +17 -1
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/sasrec/lightning.py +126 -50
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/sasrec/model.py +3 -4
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/preprocessing/__init__.py +7 -1
- replay_rec-0.18.1rc0/replay/preprocessing/discretizer.py +719 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/preprocessing/label_encoder.py +384 -52
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/cold_user_random_splitter.py +1 -1
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/__init__.py +1 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/common.py +7 -8
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/session_handler.py +3 -4
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/spark_utils.py +15 -1
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/types.py +8 -0
- replay_rec-0.18.0rc0/replay/__init__.py +0 -3
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/LICENSE +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/NOTICE +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/dataset_utils/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/nn/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/nn/sequential_dataset.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/spark_schema.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/base_metric.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/coverage.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/experiment.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/hitrate.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/map.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/mrr.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/ncis_precision.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/ndcg.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/precision.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/recall.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/rocauc.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/surprisal.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/metrics/unexpectedness.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/admm_slim.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/base_neighbour_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/base_torch_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/cql.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/ddpg.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/dt4rec/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/dt4rec/dt4rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/dt4rec/gpt1.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/dt4rec/trainer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/dt4rec/utils.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/extensions/spark_custom_models/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/extensions/spark_custom_models/als_extension.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/implicit_wrap.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/lightfm_wrap.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/mult_vae.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/neuromf.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/models/scala_als.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/nn/data/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/preprocessing/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/preprocessing/data_preparator.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/preprocessing/padder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/preprocessing/sequence_generator.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/obp_wrapper/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/obp_wrapper/utils.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/two_stages/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/two_stages/reranker.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/scenarios/two_stages/two_stages_scenario.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/utils/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/utils/logger.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/utils/model_handler.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/experimental/utils/session_handler.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/base_metric.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/categorical_diversity.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/coverage.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/descriptors.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/experiment.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/hitrate.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/map.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/mrr.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/ndcg.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/novelty.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/precision.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/recall.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/rocauc.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/surprisal.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/torch_metrics_builder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/metrics/unexpectedness.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/als.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/association_rules.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/base_neighbour_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/cat_pop_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/cluster.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/ann_mixin.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/entities/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/entities/base_hnsw_param.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/entities/hnswlib_param.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/base_index_builder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/base_inferer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_inferers/utils.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_stores/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_stores/base_index_store.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_stores/hdfs_index_store.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_stores/shared_disk_index_store.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_stores/spark_files_index_store.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/index_stores/utils.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/extensions/ann/utils.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/kl_ucb.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/knn.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/optimizer_utils/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/optimizer_utils/optimizer_factory.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/bert4rec/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/callbacks/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/callbacks/validation_callback.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/postprocessors/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/postprocessors/_base.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/nn/sequential/sasrec/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/pop_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/query_pop_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/random_rec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/slim.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/thompson_sampling.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/ucb.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/wilson.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/models/word2vec.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/optimization/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/optimization/optuna_objective.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/preprocessing/converter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/preprocessing/filters.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/preprocessing/history_based_fp.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/preprocessing/sessionizer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/scenarios/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/scenarios/fallback.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/__init__.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/base_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/k_folds.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/last_n_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/new_users_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/random_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/ratio_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/time_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/splitters/two_stage_splitter.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/dataframe_bucketizer.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/distributions.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/model_handler.py +0 -0
- {replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/utils/time.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: replay-rec
|
|
3
|
-
Version: 0.18.
|
|
3
|
+
Version: 0.18.1rc0
|
|
4
4
|
Summary: RecSys Library
|
|
5
5
|
Home-page: https://sb-ai-lab.github.io/RePlay/
|
|
6
6
|
License: Apache-2.0
|
|
@@ -21,6 +21,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
21
21
|
Provides-Extra: all
|
|
22
22
|
Provides-Extra: spark
|
|
23
23
|
Provides-Extra: torch
|
|
24
|
+
Provides-Extra: torch-openvino
|
|
24
25
|
Requires-Dist: d3rlpy (>=2.0.4,<3.0.0)
|
|
25
26
|
Requires-Dist: fixed-install-nmslib (==2.1.2)
|
|
26
27
|
Requires-Dist: gym (>=0.26.0,<0.27.0)
|
|
@@ -28,10 +29,12 @@ Requires-Dist: hnswlib (>=0.7.0,<0.8.0)
|
|
|
28
29
|
Requires-Dist: implicit (>=0.7.0,<0.8.0)
|
|
29
30
|
Requires-Dist: lightautoml (>=0.3.1,<0.4.0)
|
|
30
31
|
Requires-Dist: lightfm (==1.17)
|
|
31
|
-
Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "all"
|
|
32
|
+
Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
|
|
32
33
|
Requires-Dist: llvmlite (>=0.32.1)
|
|
33
34
|
Requires-Dist: numba (>=0.50)
|
|
34
35
|
Requires-Dist: numpy (>=1.20.0)
|
|
36
|
+
Requires-Dist: onnx (>=1.16.2,<1.17.0) ; extra == "torch-openvino" or extra == "all"
|
|
37
|
+
Requires-Dist: openvino (>=2024.3.0,<2024.4.0) ; extra == "torch-openvino" or extra == "all"
|
|
35
38
|
Requires-Dist: optuna (>=3.2.0,<3.3.0)
|
|
36
39
|
Requires-Dist: pandas (>=1.3.5,<=2.2.2)
|
|
37
40
|
Requires-Dist: polars (>=1.0.0,<1.1.0)
|
|
@@ -39,11 +42,11 @@ Requires-Dist: psutil (>=6.0.0,<6.1.0)
|
|
|
39
42
|
Requires-Dist: pyarrow (>=12.0.1)
|
|
40
43
|
Requires-Dist: pyspark (>=3.0,<3.5) ; (python_full_version >= "3.8.1" and python_version < "3.11") and (extra == "spark" or extra == "all")
|
|
41
44
|
Requires-Dist: pyspark (>=3.4,<3.5) ; (python_version >= "3.11" and python_version < "3.12") and (extra == "spark" or extra == "all")
|
|
42
|
-
Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "all"
|
|
45
|
+
Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
|
|
43
46
|
Requires-Dist: sb-obp (>=0.5.8,<0.6.0)
|
|
44
47
|
Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
|
|
45
48
|
Requires-Dist: scipy (>=1.8.1,<2.0.0)
|
|
46
|
-
Requires-Dist: torch (>=1.8,<=2.4.0) ; extra == "torch" or extra == "all"
|
|
49
|
+
Requires-Dist: torch (>=1.8,<=2.4.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
|
|
47
50
|
Project-URL: Repository, https://github.com/sb-ai-lab/RePlay
|
|
48
51
|
Description-Content-Type: text/markdown
|
|
49
52
|
|
|
@@ -52,11 +55,15 @@ Description-Content-Type: text/markdown
|
|
|
52
55
|
|
|
53
56
|
[](https://github.com/sb-ai-lab/RePlay/blob/main/LICENSE)
|
|
54
57
|
[](https://pypi.org/project/replay-rec)
|
|
58
|
+
[](https://sb-ai-lab.github.io/RePlay/)
|
|
55
59
|
[](https://pypistats.org/packages/replay-rec)
|
|
56
60
|
<br>
|
|
57
61
|
[](https://github.com/sb-ai-lab/RePlay/actions/workflows/main.yml?query=branch%3Amain)
|
|
62
|
+
[](https://github.com/astral-sh/ruff)
|
|
63
|
+
[](https://pypi.org/project/replay-rec)
|
|
58
64
|
[](https://github.com/sb-ai-lab/RePlay/discussions)
|
|
59
65
|
|
|
66
|
+
|
|
60
67
|
RePlay is an advanced framework designed to facilitate the development and evaluation of recommendation systems. It provides a robust set of tools covering the entire lifecycle of a recommendation system pipeline:
|
|
61
68
|
|
|
62
69
|
## 🚀 Features:
|
|
@@ -71,61 +78,25 @@ RePlay is an advanced framework designed to facilitate the development and evalu
|
|
|
71
78
|
1. **Diverse Hardware Support:** Compatible with various hardware configurations including CPU, GPU, Multi-GPU.
|
|
72
79
|
2. **Cluster Computing Integration:** Integrating with PySpark for distributed computing, enabling scalability for large-scale recommendation systems.
|
|
73
80
|
|
|
74
|
-
## 📖 Documentation is available [here](https://sb-ai-lab.github.io/RePlay/).
|
|
75
|
-
|
|
76
81
|
<a name="toc"></a>
|
|
77
82
|
# Table of Contents
|
|
78
83
|
|
|
79
|
-
* [Installation](#installation)
|
|
80
84
|
* [Quickstart](#quickstart)
|
|
85
|
+
* [Installation](#installation)
|
|
81
86
|
* [Resources](#examples)
|
|
82
87
|
* [Contributing to RePlay](#contributing)
|
|
83
88
|
|
|
84
89
|
|
|
85
|
-
<a name="
|
|
86
|
-
##
|
|
87
|
-
|
|
88
|
-
Installation via `pip` package manager is recommended by default:
|
|
89
|
-
|
|
90
|
-
```bash
|
|
91
|
-
pip install replay-rec
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
|
|
95
|
-
Also `experimental` submodule will not be installed.
|
|
96
|
-
|
|
97
|
-
To install `experimental` submodule please specify the version with `rc0` suffix.
|
|
98
|
-
For example:
|
|
99
|
-
|
|
100
|
-
```bash
|
|
101
|
-
pip install replay-rec==XX.YY.ZZrc0
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
### Extras
|
|
105
|
-
|
|
106
|
-
In addition to the core package, several extras are also provided, including:
|
|
107
|
-
- `[spark]`: Install PySpark functionality
|
|
108
|
-
- `[torch]`: Install PyTorch and Lightning functionality
|
|
109
|
-
- `[all]`: `[spark]` `[torch]`
|
|
90
|
+
<a name="quickstart"></a>
|
|
91
|
+
## 📈 Quickstart
|
|
110
92
|
|
|
111
|
-
Example:
|
|
112
93
|
```bash
|
|
113
|
-
|
|
114
|
-
pip install replay-rec[spark]
|
|
115
|
-
|
|
116
|
-
# Install package with experimental submodule and PySpark dependency
|
|
117
|
-
pip install replay-rec[spark]==XX.YY.ZZrc0
|
|
94
|
+
pip install replay-rec[all]
|
|
118
95
|
```
|
|
119
96
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
If you encounter an error during RePlay installation, check the [troubleshooting](https://sb-ai-lab.github.io/RePlay/pages/installation.html#troubleshooting) guide.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
<a name="quickstart"></a>
|
|
126
|
-
## 📈 Quickstart (PySpark-based)
|
|
127
|
-
|
|
97
|
+
Pyspark-based model and [fast](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) polars-based data preprocessing:
|
|
128
98
|
```python
|
|
99
|
+
from polars import from_pandas
|
|
129
100
|
from rs_datasets import MovieLens
|
|
130
101
|
|
|
131
102
|
from replay.data import Dataset, FeatureHint, FeatureInfo, FeatureSchema, FeatureType
|
|
@@ -139,10 +110,10 @@ from replay.splitters import RatioSplitter
|
|
|
139
110
|
spark = State().session
|
|
140
111
|
|
|
141
112
|
ml_1m = MovieLens("1m")
|
|
142
|
-
K=10
|
|
113
|
+
K = 10
|
|
143
114
|
|
|
144
|
-
# data
|
|
145
|
-
interactions =
|
|
115
|
+
# convert data to polars
|
|
116
|
+
interactions = from_pandas(ml_1m.ratings)
|
|
146
117
|
|
|
147
118
|
# data splitting
|
|
148
119
|
splitter = RatioSplitter(
|
|
@@ -156,7 +127,7 @@ splitter = RatioSplitter(
|
|
|
156
127
|
)
|
|
157
128
|
train, test = splitter.split(interactions)
|
|
158
129
|
|
|
159
|
-
#
|
|
130
|
+
# datasets creation
|
|
160
131
|
feature_schema = FeatureSchema(
|
|
161
132
|
[
|
|
162
133
|
FeatureInfo(
|
|
@@ -182,20 +153,18 @@ feature_schema = FeatureSchema(
|
|
|
182
153
|
]
|
|
183
154
|
)
|
|
184
155
|
|
|
185
|
-
train_dataset = Dataset(
|
|
186
|
-
|
|
187
|
-
interactions=train,
|
|
188
|
-
)
|
|
189
|
-
test_dataset = Dataset(
|
|
190
|
-
feature_schema=feature_schema,
|
|
191
|
-
interactions=test,
|
|
192
|
-
)
|
|
156
|
+
train_dataset = Dataset(feature_schema=feature_schema, interactions=train)
|
|
157
|
+
test_dataset = Dataset(feature_schema=feature_schema, interactions=test)
|
|
193
158
|
|
|
194
159
|
# data encoding
|
|
195
160
|
encoder = DatasetLabelEncoder()
|
|
196
161
|
train_dataset = encoder.fit_transform(train_dataset)
|
|
197
162
|
test_dataset = encoder.transform(test_dataset)
|
|
198
163
|
|
|
164
|
+
# convert datasets to spark
|
|
165
|
+
train_dataset.to_spark()
|
|
166
|
+
test_dataset.to_spark()
|
|
167
|
+
|
|
199
168
|
# model training
|
|
200
169
|
model = ItemKNN()
|
|
201
170
|
model.fit(train_dataset)
|
|
@@ -222,6 +191,44 @@ metrics.add_result("ItemKNN", recs)
|
|
|
222
191
|
print(metrics.results)
|
|
223
192
|
```
|
|
224
193
|
|
|
194
|
+
<a name="installation"></a>
|
|
195
|
+
## 🔧 Installation
|
|
196
|
+
|
|
197
|
+
Installation via `pip` package manager is recommended by default:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
pip install replay-rec
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
|
|
204
|
+
Also `experimental` submodule will not be installed.
|
|
205
|
+
|
|
206
|
+
To install `experimental` submodule please specify the version with `rc0` suffix.
|
|
207
|
+
For example:
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
pip install replay-rec==XX.YY.ZZrc0
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Extras
|
|
214
|
+
|
|
215
|
+
In addition to the core package, several extras are also provided, including:
|
|
216
|
+
- `[spark]`: Install PySpark functionality
|
|
217
|
+
- `[torch]`: Install PyTorch and Lightning functionality
|
|
218
|
+
- `[all]`: `[spark]` `[torch]`
|
|
219
|
+
|
|
220
|
+
Example:
|
|
221
|
+
```bash
|
|
222
|
+
# Install core package with PySpark dependency
|
|
223
|
+
pip install replay-rec[spark]
|
|
224
|
+
|
|
225
|
+
# Install package with experimental submodule and PySpark dependency
|
|
226
|
+
pip install replay-rec[spark]==XX.YY.ZZrc0
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
|
|
230
|
+
|
|
231
|
+
|
|
225
232
|
<a name="examples"></a>
|
|
226
233
|
## 📑 Resources
|
|
227
234
|
|
|
@@ -234,14 +241,19 @@ print(metrics.results)
|
|
|
234
241
|
6. [06_item2item_recommendations.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/06_item2item_recommendations.ipynb) - Item to Item recommendations example.
|
|
235
242
|
7. [07_filters.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/07_filters.ipynb) - An example of using filters.
|
|
236
243
|
8. [08_recommending_for_categories.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/08_recommending_for_categories.ipynb) - An example of recommendation for product categories.
|
|
237
|
-
9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using
|
|
238
|
-
|
|
244
|
+
9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformer-based SASRec model to generate recommendations.
|
|
245
|
+
10. [10_bert4rec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/10_bert4rec_example.ipynb) - An example of using transformer-based BERT4Rec model to generate recommendations.
|
|
246
|
+
11. [11_sasrec_dataframes_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) - speed comparison of using different frameworks (pandas, polars, pyspark) for data processing during SASRec training.
|
|
247
|
+
12. [12_neural_ts_exp.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/12_neural_ts_exp.ipynb) - An example of using Neural Thompson Sampling bandit model (based on Wide&Deep architecture).
|
|
248
|
+
13. [13_personalized_bandit_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/13_personalized_bandit_comparison.ipynb) - A comparison of context-free and contextual bandit models.
|
|
249
|
+
14. [14_hierarchical_recommender.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/14_hierarchical_recommender.ipynb) - An example of using HierarchicalRecommender with user-disjoint LinUCB.
|
|
239
250
|
|
|
240
251
|
### Videos and papers
|
|
241
252
|
* **Video guides**:
|
|
242
253
|
- [Replay for offline recommendations, AI Journey 2021](https://www.youtube.com/watch?v=ejQZKGAG0xs)
|
|
243
254
|
|
|
244
255
|
* **Research papers**:
|
|
256
|
+
- [RePlay: a Recommendation Framework for Experimentation and Production Use](https://arxiv.org/abs/2409.07272) Alexey Vasilev, Anna Volodkevich, Denis Kulandin, Tatiana Bysheva, Anton Klenitskiy. In The 18th ACM Conference on Recommender Systems (RecSys '24)
|
|
245
257
|
- [Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?](https://doi.org/10.1145/3604915.3610644) Anton Klenitskiy, Alexey Vasilev. In The 17th ACM Conference on Recommender Systems (RecSys '23)
|
|
246
258
|
- [The Long Tail of Context: Does it Exist and Matter?](https://arxiv.org/abs/2210.01023). Konstantin Bauman, Alexey Vasilev, Alexander Tuzhilin. In Workshop on Context-Aware Recommender Systems (CARS) (RecSys '22)
|
|
247
259
|
- [Multiobjective Evaluation of Reinforcement Learning Based Recommender Systems](https://doi.org/10.1145/3523227.3551485). Alexey Grishanov, Anastasia Ianina, Konstantin Vorontsov. In The 16th ACM Conference on Recommender Systems (RecSys '22)
|
|
@@ -252,3 +264,4 @@ print(metrics.results)
|
|
|
252
264
|
|
|
253
265
|
We welcome community contributions. For details please check our [contributing guidelines](CONTRIBUTING.md).
|
|
254
266
|
|
|
267
|
+
|
|
@@ -3,11 +3,15 @@
|
|
|
3
3
|
|
|
4
4
|
[](https://github.com/sb-ai-lab/RePlay/blob/main/LICENSE)
|
|
5
5
|
[](https://pypi.org/project/replay-rec)
|
|
6
|
+
[](https://sb-ai-lab.github.io/RePlay/)
|
|
6
7
|
[](https://pypistats.org/packages/replay-rec)
|
|
7
8
|
<br>
|
|
8
9
|
[](https://github.com/sb-ai-lab/RePlay/actions/workflows/main.yml?query=branch%3Amain)
|
|
10
|
+
[](https://github.com/astral-sh/ruff)
|
|
11
|
+
[](https://pypi.org/project/replay-rec)
|
|
9
12
|
[](https://github.com/sb-ai-lab/RePlay/discussions)
|
|
10
13
|
|
|
14
|
+
|
|
11
15
|
RePlay is an advanced framework designed to facilitate the development and evaluation of recommendation systems. It provides a robust set of tools covering the entire lifecycle of a recommendation system pipeline:
|
|
12
16
|
|
|
13
17
|
## 🚀 Features:
|
|
@@ -22,61 +26,25 @@ RePlay is an advanced framework designed to facilitate the development and evalu
|
|
|
22
26
|
1. **Diverse Hardware Support:** Compatible with various hardware configurations including CPU, GPU, Multi-GPU.
|
|
23
27
|
2. **Cluster Computing Integration:** Integrating with PySpark for distributed computing, enabling scalability for large-scale recommendation systems.
|
|
24
28
|
|
|
25
|
-
## 📖 Documentation is available [here](https://sb-ai-lab.github.io/RePlay/).
|
|
26
|
-
|
|
27
29
|
<a name="toc"></a>
|
|
28
30
|
# Table of Contents
|
|
29
31
|
|
|
30
|
-
* [Installation](#installation)
|
|
31
32
|
* [Quickstart](#quickstart)
|
|
33
|
+
* [Installation](#installation)
|
|
32
34
|
* [Resources](#examples)
|
|
33
35
|
* [Contributing to RePlay](#contributing)
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
<a name="
|
|
37
|
-
##
|
|
38
|
-
|
|
39
|
-
Installation via `pip` package manager is recommended by default:
|
|
40
|
-
|
|
41
|
-
```bash
|
|
42
|
-
pip install replay-rec
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
|
|
46
|
-
Also `experimental` submodule will not be installed.
|
|
47
|
-
|
|
48
|
-
To install `experimental` submodule please specify the version with `rc0` suffix.
|
|
49
|
-
For example:
|
|
50
|
-
|
|
51
|
-
```bash
|
|
52
|
-
pip install replay-rec==XX.YY.ZZrc0
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
### Extras
|
|
56
|
-
|
|
57
|
-
In addition to the core package, several extras are also provided, including:
|
|
58
|
-
- `[spark]`: Install PySpark functionality
|
|
59
|
-
- `[torch]`: Install PyTorch and Lightning functionality
|
|
60
|
-
- `[all]`: `[spark]` `[torch]`
|
|
38
|
+
<a name="quickstart"></a>
|
|
39
|
+
## 📈 Quickstart
|
|
61
40
|
|
|
62
|
-
Example:
|
|
63
41
|
```bash
|
|
64
|
-
|
|
65
|
-
pip install replay-rec[spark]
|
|
66
|
-
|
|
67
|
-
# Install package with experimental submodule and PySpark dependency
|
|
68
|
-
pip install replay-rec[spark]==XX.YY.ZZrc0
|
|
42
|
+
pip install replay-rec[all]
|
|
69
43
|
```
|
|
70
44
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
If you encounter an error during RePlay installation, check the [troubleshooting](https://sb-ai-lab.github.io/RePlay/pages/installation.html#troubleshooting) guide.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
<a name="quickstart"></a>
|
|
77
|
-
## 📈 Quickstart (PySpark-based)
|
|
78
|
-
|
|
45
|
+
Pyspark-based model and [fast](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) polars-based data preprocessing:
|
|
79
46
|
```python
|
|
47
|
+
from polars import from_pandas
|
|
80
48
|
from rs_datasets import MovieLens
|
|
81
49
|
|
|
82
50
|
from replay.data import Dataset, FeatureHint, FeatureInfo, FeatureSchema, FeatureType
|
|
@@ -90,10 +58,10 @@ from replay.splitters import RatioSplitter
|
|
|
90
58
|
spark = State().session
|
|
91
59
|
|
|
92
60
|
ml_1m = MovieLens("1m")
|
|
93
|
-
K=10
|
|
61
|
+
K = 10
|
|
94
62
|
|
|
95
|
-
# data
|
|
96
|
-
interactions =
|
|
63
|
+
# convert data to polars
|
|
64
|
+
interactions = from_pandas(ml_1m.ratings)
|
|
97
65
|
|
|
98
66
|
# data splitting
|
|
99
67
|
splitter = RatioSplitter(
|
|
@@ -107,7 +75,7 @@ splitter = RatioSplitter(
|
|
|
107
75
|
)
|
|
108
76
|
train, test = splitter.split(interactions)
|
|
109
77
|
|
|
110
|
-
#
|
|
78
|
+
# datasets creation
|
|
111
79
|
feature_schema = FeatureSchema(
|
|
112
80
|
[
|
|
113
81
|
FeatureInfo(
|
|
@@ -133,20 +101,18 @@ feature_schema = FeatureSchema(
|
|
|
133
101
|
]
|
|
134
102
|
)
|
|
135
103
|
|
|
136
|
-
train_dataset = Dataset(
|
|
137
|
-
|
|
138
|
-
interactions=train,
|
|
139
|
-
)
|
|
140
|
-
test_dataset = Dataset(
|
|
141
|
-
feature_schema=feature_schema,
|
|
142
|
-
interactions=test,
|
|
143
|
-
)
|
|
104
|
+
train_dataset = Dataset(feature_schema=feature_schema, interactions=train)
|
|
105
|
+
test_dataset = Dataset(feature_schema=feature_schema, interactions=test)
|
|
144
106
|
|
|
145
107
|
# data encoding
|
|
146
108
|
encoder = DatasetLabelEncoder()
|
|
147
109
|
train_dataset = encoder.fit_transform(train_dataset)
|
|
148
110
|
test_dataset = encoder.transform(test_dataset)
|
|
149
111
|
|
|
112
|
+
# convert datasets to spark
|
|
113
|
+
train_dataset.to_spark()
|
|
114
|
+
test_dataset.to_spark()
|
|
115
|
+
|
|
150
116
|
# model training
|
|
151
117
|
model = ItemKNN()
|
|
152
118
|
model.fit(train_dataset)
|
|
@@ -173,6 +139,44 @@ metrics.add_result("ItemKNN", recs)
|
|
|
173
139
|
print(metrics.results)
|
|
174
140
|
```
|
|
175
141
|
|
|
142
|
+
<a name="installation"></a>
|
|
143
|
+
## 🔧 Installation
|
|
144
|
+
|
|
145
|
+
Installation via `pip` package manager is recommended by default:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
pip install replay-rec
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
|
|
152
|
+
Also `experimental` submodule will not be installed.
|
|
153
|
+
|
|
154
|
+
To install `experimental` submodule please specify the version with `rc0` suffix.
|
|
155
|
+
For example:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
pip install replay-rec==XX.YY.ZZrc0
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Extras
|
|
162
|
+
|
|
163
|
+
In addition to the core package, several extras are also provided, including:
|
|
164
|
+
- `[spark]`: Install PySpark functionality
|
|
165
|
+
- `[torch]`: Install PyTorch and Lightning functionality
|
|
166
|
+
- `[all]`: `[spark]` `[torch]`
|
|
167
|
+
|
|
168
|
+
Example:
|
|
169
|
+
```bash
|
|
170
|
+
# Install core package with PySpark dependency
|
|
171
|
+
pip install replay-rec[spark]
|
|
172
|
+
|
|
173
|
+
# Install package with experimental submodule and PySpark dependency
|
|
174
|
+
pip install replay-rec[spark]==XX.YY.ZZrc0
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
|
|
178
|
+
|
|
179
|
+
|
|
176
180
|
<a name="examples"></a>
|
|
177
181
|
## 📑 Resources
|
|
178
182
|
|
|
@@ -185,14 +189,19 @@ print(metrics.results)
|
|
|
185
189
|
6. [06_item2item_recommendations.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/06_item2item_recommendations.ipynb) - Item to Item recommendations example.
|
|
186
190
|
7. [07_filters.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/07_filters.ipynb) - An example of using filters.
|
|
187
191
|
8. [08_recommending_for_categories.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/08_recommending_for_categories.ipynb) - An example of recommendation for product categories.
|
|
188
|
-
9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using
|
|
189
|
-
|
|
192
|
+
9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformer-based SASRec model to generate recommendations.
|
|
193
|
+
10. [10_bert4rec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/10_bert4rec_example.ipynb) - An example of using transformer-based BERT4Rec model to generate recommendations.
|
|
194
|
+
11. [11_sasrec_dataframes_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) - speed comparison of using different frameworks (pandas, polars, pyspark) for data processing during SASRec training.
|
|
195
|
+
12. [12_neural_ts_exp.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/12_neural_ts_exp.ipynb) - An example of using Neural Thompson Sampling bandit model (based on Wide&Deep architecture).
|
|
196
|
+
13. [13_personalized_bandit_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/13_personalized_bandit_comparison.ipynb) - A comparison of context-free and contextual bandit models.
|
|
197
|
+
14. [14_hierarchical_recommender.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/14_hierarchical_recommender.ipynb) - An example of using HierarchicalRecommender with user-disjoint LinUCB.
|
|
190
198
|
|
|
191
199
|
### Videos and papers
|
|
192
200
|
* **Video guides**:
|
|
193
201
|
- [Replay for offline recommendations, AI Journey 2021](https://www.youtube.com/watch?v=ejQZKGAG0xs)
|
|
194
202
|
|
|
195
203
|
* **Research papers**:
|
|
204
|
+
- [RePlay: a Recommendation Framework for Experimentation and Production Use](https://arxiv.org/abs/2409.07272) Alexey Vasilev, Anna Volodkevich, Denis Kulandin, Tatiana Bysheva, Anton Klenitskiy. In The 18th ACM Conference on Recommender Systems (RecSys '24)
|
|
196
205
|
- [Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?](https://doi.org/10.1145/3604915.3610644) Anton Klenitskiy, Alexey Vasilev. In The 17th ACM Conference on Recommender Systems (RecSys '23)
|
|
197
206
|
- [The Long Tail of Context: Does it Exist and Matter?](https://arxiv.org/abs/2210.01023). Konstantin Bauman, Alexey Vasilev, Alexander Tuzhilin. In Workshop on Context-Aware Recommender Systems (CARS) (RecSys '22)
|
|
198
207
|
- [Multiobjective Evaluation of Reinforcement Learning Based Recommender Systems](https://doi.org/10.1145/3523227.3551485). Alexey Grishanov, Anastasia Ianina, Konstantin Vorontsov. In The 16th ACM Conference on Recommender Systems (RecSys '22)
|
|
@@ -202,3 +211,4 @@ print(metrics.results)
|
|
|
202
211
|
## 💡 Contributing to RePlay
|
|
203
212
|
|
|
204
213
|
We welcome community contributions. For details please check our [contributing guidelines](CONTRIBUTING.md).
|
|
214
|
+
|
|
@@ -40,7 +40,7 @@ classifiers = [
|
|
|
40
40
|
exclude = [
|
|
41
41
|
"replay/conftest.py",
|
|
42
42
|
]
|
|
43
|
-
version = "0.18.
|
|
43
|
+
version = "0.18.1.preview"
|
|
44
44
|
|
|
45
45
|
[tool.poetry.dependencies]
|
|
46
46
|
python = ">=3.8.1, <3.12"
|
|
@@ -52,6 +52,8 @@ scipy = "^1.8.1"
|
|
|
52
52
|
psutil = "~6.0.0"
|
|
53
53
|
scikit-learn = "^1.0.2"
|
|
54
54
|
pyarrow = ">=12.0.1"
|
|
55
|
+
openvino = {version = "~2024.3.0", optional = true}
|
|
56
|
+
onnx = {version = "~1.16.2", optional = true}
|
|
55
57
|
fixed-install-nmslib = "2.1.2"
|
|
56
58
|
hnswlib = "^0.7.0"
|
|
57
59
|
pyspark = [
|
|
@@ -73,7 +75,8 @@ gym = "^0.26.0"
|
|
|
73
75
|
[tool.poetry.extras]
|
|
74
76
|
spark = ["pyspark"]
|
|
75
77
|
torch = ["torch", "pytorch-ranger", "lightning"]
|
|
76
|
-
|
|
78
|
+
torch-openvino = ["torch", "pytorch-ranger", "lightning", "openvino", "onnx"]
|
|
79
|
+
all = ["pyspark", "torch", "pytorch-ranger", "lightning", "openvino", "onnx"]
|
|
77
80
|
|
|
78
81
|
[tool.poetry.group.dev.dependencies]
|
|
79
82
|
jupyter = "~1.0.0"
|
|
@@ -92,10 +95,11 @@ myst-parser = "1.0.0"
|
|
|
92
95
|
ghp-import = "2.1.0"
|
|
93
96
|
docutils = "0.16"
|
|
94
97
|
data-science-types = "0.2.23"
|
|
98
|
+
filelock = "~3.14.0"
|
|
95
99
|
|
|
96
100
|
[tool.poetry-dynamic-versioning]
|
|
97
101
|
enable = false
|
|
98
|
-
format-jinja = """0.18.
|
|
102
|
+
format-jinja = """0.18.1{{ env['PACKAGE_SUFFIX'] }}"""
|
|
99
103
|
vcs = "git"
|
|
100
104
|
|
|
101
105
|
[tool.ruff]
|
|
@@ -458,13 +458,23 @@ class Dataset:
|
|
|
458
458
|
if feature.feature_hint in [FeatureHint.ITEM_ID, FeatureHint.QUERY_ID]:
|
|
459
459
|
return nunique(self._ids_feature_map[feature.feature_hint], column)
|
|
460
460
|
assert feature.feature_source
|
|
461
|
+
if feature.feature_type == FeatureType.CATEGORICAL_LIST:
|
|
462
|
+
if self.is_spark:
|
|
463
|
+
data = (
|
|
464
|
+
self._feature_source_map[feature.feature_source]
|
|
465
|
+
.select(column)
|
|
466
|
+
.withColumn(column, sf.explode(column))
|
|
467
|
+
)
|
|
468
|
+
else:
|
|
469
|
+
data = self._feature_source_map[feature.feature_source][[column]].explode(column)
|
|
470
|
+
return nunique(data, column)
|
|
461
471
|
return nunique(self._feature_source_map[feature.feature_source], column)
|
|
462
472
|
|
|
463
473
|
return callback
|
|
464
474
|
|
|
465
475
|
def _set_cardinality(self, features_list: Sequence[FeatureInfo]) -> None:
|
|
466
476
|
for feature in features_list:
|
|
467
|
-
if feature.feature_type
|
|
477
|
+
if feature.feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST]:
|
|
468
478
|
feature._set_cardinality_callback(self._get_cardinality(feature))
|
|
469
479
|
|
|
470
480
|
def _fill_feature_schema(self, feature_schema: FeatureSchema) -> FeatureSchema:
|
|
@@ -581,6 +591,7 @@ class Dataset:
|
|
|
581
591
|
data: DataFrameLike,
|
|
582
592
|
column: str,
|
|
583
593
|
source: FeatureSource,
|
|
594
|
+
feature_type: FeatureType,
|
|
584
595
|
cardinality: Optional[int],
|
|
585
596
|
) -> None:
|
|
586
597
|
"""
|
|
@@ -593,6 +604,16 @@ class Dataset:
|
|
|
593
604
|
Option: Keep this criterion, but suggest the user to disable the check if he understands
|
|
594
605
|
that the criterion will not pass.
|
|
595
606
|
"""
|
|
607
|
+
if feature_type == FeatureType.CATEGORICAL_LIST: # explode column if list
|
|
608
|
+
data = data.withColumn(column, sf.explode(column)) if self.is_spark else data[[column]].explode(column)
|
|
609
|
+
|
|
610
|
+
if self.is_pandas:
|
|
611
|
+
try:
|
|
612
|
+
data[column] = data[column].astype(int)
|
|
613
|
+
except Exception:
|
|
614
|
+
msg = f"IDs in {source.name}.{column} are not encoded. They are not int."
|
|
615
|
+
raise ValueError(msg)
|
|
616
|
+
|
|
596
617
|
if self.is_pandas:
|
|
597
618
|
is_int = np.issubdtype(dict(data.dtypes)[column], int)
|
|
598
619
|
elif self.is_spark:
|
|
@@ -632,6 +653,7 @@ class Dataset:
|
|
|
632
653
|
self.interactions,
|
|
633
654
|
feature.column,
|
|
634
655
|
FeatureSource.INTERACTIONS,
|
|
656
|
+
feature.feature_type,
|
|
635
657
|
feature.cardinality,
|
|
636
658
|
)
|
|
637
659
|
if self.item_features is not None:
|
|
@@ -639,6 +661,7 @@ class Dataset:
|
|
|
639
661
|
self.item_features,
|
|
640
662
|
feature.column,
|
|
641
663
|
FeatureSource.ITEM_FEATURES,
|
|
664
|
+
feature.feature_type,
|
|
642
665
|
feature.cardinality,
|
|
643
666
|
)
|
|
644
667
|
elif feature.feature_hint == FeatureHint.QUERY_ID:
|
|
@@ -646,6 +669,7 @@ class Dataset:
|
|
|
646
669
|
self.interactions,
|
|
647
670
|
feature.column,
|
|
648
671
|
FeatureSource.INTERACTIONS,
|
|
672
|
+
feature.feature_type,
|
|
649
673
|
feature.cardinality,
|
|
650
674
|
)
|
|
651
675
|
if self.query_features is not None:
|
|
@@ -653,6 +677,7 @@ class Dataset:
|
|
|
653
677
|
self.query_features,
|
|
654
678
|
feature.column,
|
|
655
679
|
FeatureSource.QUERY_FEATURES,
|
|
680
|
+
feature.feature_type,
|
|
656
681
|
feature.cardinality,
|
|
657
682
|
)
|
|
658
683
|
else:
|
|
@@ -661,6 +686,7 @@ class Dataset:
|
|
|
661
686
|
data,
|
|
662
687
|
feature.column,
|
|
663
688
|
feature.feature_source,
|
|
689
|
+
feature.feature_type,
|
|
664
690
|
feature.cardinality,
|
|
665
691
|
)
|
|
666
692
|
|
{replay_rec-0.18.0rc0 → replay_rec-0.18.1rc0}/replay/data/dataset_utils/dataset_label_encoder.py
RENAMED
|
@@ -8,8 +8,8 @@ Contains classes for encoding categorical data
|
|
|
8
8
|
import warnings
|
|
9
9
|
from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union
|
|
10
10
|
|
|
11
|
-
from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource
|
|
12
|
-
from replay.preprocessing import LabelEncoder, LabelEncodingRule
|
|
11
|
+
from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
|
|
12
|
+
from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
|
|
13
13
|
from replay.preprocessing.label_encoder import HandleUnknownStrategies
|
|
14
14
|
|
|
15
15
|
|
|
@@ -62,7 +62,10 @@ class DatasetLabelEncoder:
|
|
|
62
62
|
|
|
63
63
|
self._fill_features_columns(dataset.feature_schema)
|
|
64
64
|
for column, feature_info in dataset.feature_schema.categorical_features.items():
|
|
65
|
-
|
|
65
|
+
encoding_rule_class = (
|
|
66
|
+
SequenceEncodingRule if feature_info.feature_type == FeatureType.CATEGORICAL_LIST else LabelEncodingRule
|
|
67
|
+
)
|
|
68
|
+
encoding_rule = encoding_rule_class(
|
|
66
69
|
column, handle_unknown=self._handle_unknown_rule, default_value=self._default_value_rule
|
|
67
70
|
)
|
|
68
71
|
if feature_info.feature_hint == FeatureHint.QUERY_ID:
|