replay-rec 0.18.0rc0__tar.gz → 0.18.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/PKG-INFO +75 -70
  2. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/README.md +66 -56
  3. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/pyproject.toml +15 -18
  4. replay_rec-0.18.1/replay/__init__.py +3 -0
  5. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/dataset.py +27 -1
  6. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/dataset_utils/dataset_label_encoder.py +6 -3
  7. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/nn/schema.py +37 -16
  8. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/nn/sequence_tokenizer.py +313 -165
  9. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/nn/torch_sequential_dataset.py +17 -8
  10. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/nn/utils.py +14 -7
  11. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/schema.py +10 -6
  12. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/offline_metrics.py +2 -2
  13. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/__init__.py +1 -0
  14. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/base_rec.py +18 -21
  15. replay_rec-0.18.1/replay/models/lin_ucb.py +407 -0
  16. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/dataset.py +17 -4
  17. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/lightning.py +121 -54
  18. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/model.py +21 -0
  19. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/callbacks/prediction_callbacks.py +5 -1
  20. replay_rec-0.18.1/replay/models/nn/sequential/compiled/__init__.py +5 -0
  21. replay_rec-0.18.1/replay/models/nn/sequential/compiled/base_compiled_model.py +261 -0
  22. replay_rec-0.18.1/replay/models/nn/sequential/compiled/bert4rec_compiled.py +152 -0
  23. replay_rec-0.18.1/replay/models/nn/sequential/compiled/sasrec_compiled.py +145 -0
  24. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/postprocessors/postprocessors.py +27 -1
  25. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/dataset.py +17 -1
  26. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/lightning.py +126 -50
  27. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/model.py +3 -4
  28. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/preprocessing/__init__.py +7 -1
  29. replay_rec-0.18.1/replay/preprocessing/discretizer.py +719 -0
  30. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/preprocessing/label_encoder.py +384 -52
  31. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/cold_user_random_splitter.py +1 -1
  32. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/__init__.py +1 -0
  33. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/common.py +7 -8
  34. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/session_handler.py +3 -4
  35. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/spark_utils.py +15 -1
  36. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/types.py +8 -0
  37. replay_rec-0.18.0rc0/NOTICE +0 -41
  38. replay_rec-0.18.0rc0/replay/__init__.py +0 -3
  39. replay_rec-0.18.0rc0/replay/experimental/metrics/__init__.py +0 -62
  40. replay_rec-0.18.0rc0/replay/experimental/metrics/base_metric.py +0 -602
  41. replay_rec-0.18.0rc0/replay/experimental/metrics/coverage.py +0 -97
  42. replay_rec-0.18.0rc0/replay/experimental/metrics/experiment.py +0 -175
  43. replay_rec-0.18.0rc0/replay/experimental/metrics/hitrate.py +0 -26
  44. replay_rec-0.18.0rc0/replay/experimental/metrics/map.py +0 -30
  45. replay_rec-0.18.0rc0/replay/experimental/metrics/mrr.py +0 -18
  46. replay_rec-0.18.0rc0/replay/experimental/metrics/ncis_precision.py +0 -31
  47. replay_rec-0.18.0rc0/replay/experimental/metrics/ndcg.py +0 -49
  48. replay_rec-0.18.0rc0/replay/experimental/metrics/precision.py +0 -22
  49. replay_rec-0.18.0rc0/replay/experimental/metrics/recall.py +0 -25
  50. replay_rec-0.18.0rc0/replay/experimental/metrics/rocauc.py +0 -49
  51. replay_rec-0.18.0rc0/replay/experimental/metrics/surprisal.py +0 -90
  52. replay_rec-0.18.0rc0/replay/experimental/metrics/unexpectedness.py +0 -76
  53. replay_rec-0.18.0rc0/replay/experimental/models/__init__.py +0 -10
  54. replay_rec-0.18.0rc0/replay/experimental/models/admm_slim.py +0 -205
  55. replay_rec-0.18.0rc0/replay/experimental/models/base_neighbour_rec.py +0 -204
  56. replay_rec-0.18.0rc0/replay/experimental/models/base_rec.py +0 -1271
  57. replay_rec-0.18.0rc0/replay/experimental/models/base_torch_rec.py +0 -234
  58. replay_rec-0.18.0rc0/replay/experimental/models/cql.py +0 -454
  59. replay_rec-0.18.0rc0/replay/experimental/models/ddpg.py +0 -923
  60. replay_rec-0.18.0rc0/replay/experimental/models/dt4rec/dt4rec.py +0 -189
  61. replay_rec-0.18.0rc0/replay/experimental/models/dt4rec/gpt1.py +0 -401
  62. replay_rec-0.18.0rc0/replay/experimental/models/dt4rec/trainer.py +0 -127
  63. replay_rec-0.18.0rc0/replay/experimental/models/dt4rec/utils.py +0 -265
  64. replay_rec-0.18.0rc0/replay/experimental/models/extensions/spark_custom_models/als_extension.py +0 -792
  65. replay_rec-0.18.0rc0/replay/experimental/models/implicit_wrap.py +0 -131
  66. replay_rec-0.18.0rc0/replay/experimental/models/lightfm_wrap.py +0 -302
  67. replay_rec-0.18.0rc0/replay/experimental/models/mult_vae.py +0 -332
  68. replay_rec-0.18.0rc0/replay/experimental/models/neuromf.py +0 -406
  69. replay_rec-0.18.0rc0/replay/experimental/models/scala_als.py +0 -296
  70. replay_rec-0.18.0rc0/replay/experimental/nn/data/__init__.py +0 -1
  71. replay_rec-0.18.0rc0/replay/experimental/nn/data/schema_builder.py +0 -55
  72. replay_rec-0.18.0rc0/replay/experimental/preprocessing/__init__.py +0 -3
  73. replay_rec-0.18.0rc0/replay/experimental/preprocessing/data_preparator.py +0 -839
  74. replay_rec-0.18.0rc0/replay/experimental/preprocessing/padder.py +0 -229
  75. replay_rec-0.18.0rc0/replay/experimental/preprocessing/sequence_generator.py +0 -208
  76. replay_rec-0.18.0rc0/replay/experimental/scenarios/__init__.py +0 -1
  77. replay_rec-0.18.0rc0/replay/experimental/scenarios/obp_wrapper/__init__.py +0 -8
  78. replay_rec-0.18.0rc0/replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +0 -74
  79. replay_rec-0.18.0rc0/replay/experimental/scenarios/obp_wrapper/replay_offline.py +0 -248
  80. replay_rec-0.18.0rc0/replay/experimental/scenarios/obp_wrapper/utils.py +0 -87
  81. replay_rec-0.18.0rc0/replay/experimental/scenarios/two_stages/reranker.py +0 -117
  82. replay_rec-0.18.0rc0/replay/experimental/scenarios/two_stages/two_stages_scenario.py +0 -757
  83. replay_rec-0.18.0rc0/replay/experimental/utils/logger.py +0 -24
  84. replay_rec-0.18.0rc0/replay/experimental/utils/model_handler.py +0 -186
  85. replay_rec-0.18.0rc0/replay/experimental/utils/session_handler.py +0 -44
  86. replay_rec-0.18.0rc0/replay/models/extensions/ann/__init__.py +0 -0
  87. replay_rec-0.18.0rc0/replay/models/extensions/ann/entities/__init__.py +0 -0
  88. replay_rec-0.18.0rc0/replay/models/extensions/ann/index_builders/__init__.py +0 -0
  89. replay_rec-0.18.0rc0/replay/models/extensions/ann/index_inferers/__init__.py +0 -0
  90. replay_rec-0.18.0rc0/replay/models/extensions/ann/index_stores/__init__.py +0 -0
  91. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/LICENSE +0 -0
  92. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/__init__.py +0 -0
  93. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/dataset_utils/__init__.py +0 -0
  94. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/nn/__init__.py +0 -0
  95. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/nn/sequential_dataset.py +0 -0
  96. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/data/spark_schema.py +0 -0
  97. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/__init__.py +0 -0
  98. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/base_metric.py +0 -0
  99. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/categorical_diversity.py +0 -0
  100. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/coverage.py +0 -0
  101. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/descriptors.py +0 -0
  102. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/experiment.py +0 -0
  103. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/hitrate.py +0 -0
  104. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/map.py +0 -0
  105. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/mrr.py +0 -0
  106. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/ndcg.py +0 -0
  107. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/novelty.py +0 -0
  108. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/precision.py +0 -0
  109. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/recall.py +0 -0
  110. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/rocauc.py +0 -0
  111. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/surprisal.py +0 -0
  112. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/torch_metrics_builder.py +0 -0
  113. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/metrics/unexpectedness.py +0 -0
  114. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/als.py +0 -0
  115. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/association_rules.py +0 -0
  116. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/base_neighbour_rec.py +0 -0
  117. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/cat_pop_rec.py +0 -0
  118. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/cluster.py +0 -0
  119. {replay_rec-0.18.0rc0/replay/experimental → replay_rec-0.18.1/replay/models/extensions}/__init__.py +0 -0
  120. {replay_rec-0.18.0rc0/replay/experimental/models/dt4rec → replay_rec-0.18.1/replay/models/extensions/ann}/__init__.py +0 -0
  121. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/ann_mixin.py +0 -0
  122. {replay_rec-0.18.0rc0/replay/experimental/models/extensions/spark_custom_models → replay_rec-0.18.1/replay/models/extensions/ann/entities}/__init__.py +0 -0
  123. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/base_hnsw_param.py +0 -0
  124. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/hnswlib_param.py +0 -0
  125. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -0
  126. {replay_rec-0.18.0rc0/replay/experimental/scenarios/two_stages → replay_rec-0.18.1/replay/models/extensions/ann/index_builders}/__init__.py +0 -0
  127. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/base_index_builder.py +0 -0
  128. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +0 -0
  129. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +0 -0
  130. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +0 -0
  131. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +0 -0
  132. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +0 -0
  133. {replay_rec-0.18.0rc0/replay/experimental/utils → replay_rec-0.18.1/replay/models/extensions/ann/index_inferers}/__init__.py +0 -0
  134. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/base_inferer.py +0 -0
  135. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +0 -0
  136. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +0 -0
  137. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +0 -0
  138. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +0 -0
  139. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/utils.py +0 -0
  140. {replay_rec-0.18.0rc0/replay/models/extensions → replay_rec-0.18.1/replay/models/extensions/ann/index_stores}/__init__.py +0 -0
  141. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/base_index_store.py +0 -0
  142. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/hdfs_index_store.py +0 -0
  143. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/shared_disk_index_store.py +0 -0
  144. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/spark_files_index_store.py +0 -0
  145. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/utils.py +0 -0
  146. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/extensions/ann/utils.py +0 -0
  147. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/kl_ucb.py +0 -0
  148. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/knn.py +0 -0
  149. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/__init__.py +0 -0
  150. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/optimizer_utils/__init__.py +0 -0
  151. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/optimizer_utils/optimizer_factory.py +0 -0
  152. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/__init__.py +0 -0
  153. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/__init__.py +0 -0
  154. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/callbacks/__init__.py +0 -0
  155. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/callbacks/validation_callback.py +0 -0
  156. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/postprocessors/__init__.py +0 -0
  157. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/postprocessors/_base.py +0 -0
  158. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/__init__.py +0 -0
  159. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/pop_rec.py +0 -0
  160. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/query_pop_rec.py +0 -0
  161. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/random_rec.py +0 -0
  162. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/slim.py +0 -0
  163. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/thompson_sampling.py +0 -0
  164. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/ucb.py +0 -0
  165. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/wilson.py +0 -0
  166. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/models/word2vec.py +0 -0
  167. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/optimization/__init__.py +0 -0
  168. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/optimization/optuna_objective.py +0 -0
  169. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/preprocessing/converter.py +0 -0
  170. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/preprocessing/filters.py +0 -0
  171. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/preprocessing/history_based_fp.py +0 -0
  172. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/preprocessing/sessionizer.py +0 -0
  173. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/scenarios/__init__.py +0 -0
  174. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/scenarios/fallback.py +0 -0
  175. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/__init__.py +0 -0
  176. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/base_splitter.py +0 -0
  177. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/k_folds.py +0 -0
  178. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/last_n_splitter.py +0 -0
  179. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/new_users_splitter.py +0 -0
  180. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/random_splitter.py +0 -0
  181. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/ratio_splitter.py +0 -0
  182. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/time_splitter.py +0 -0
  183. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/splitters/two_stage_splitter.py +0 -0
  184. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/dataframe_bucketizer.py +0 -0
  185. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/distributions.py +0 -0
  186. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/model_handler.py +0 -0
  187. {replay_rec-0.18.0rc0 → replay_rec-0.18.1}/replay/utils/time.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: replay-rec
3
- Version: 0.18.0rc0
3
+ Version: 0.18.1
4
4
  Summary: RecSys Library
5
5
  Home-page: https://sb-ai-lab.github.io/RePlay/
6
6
  License: Apache-2.0
@@ -21,29 +21,24 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
21
  Provides-Extra: all
22
22
  Provides-Extra: spark
23
23
  Provides-Extra: torch
24
- Requires-Dist: d3rlpy (>=2.0.4,<3.0.0)
24
+ Provides-Extra: torch-openvino
25
25
  Requires-Dist: fixed-install-nmslib (==2.1.2)
26
- Requires-Dist: gym (>=0.26.0,<0.27.0)
27
26
  Requires-Dist: hnswlib (>=0.7.0,<0.8.0)
28
- Requires-Dist: implicit (>=0.7.0,<0.8.0)
29
- Requires-Dist: lightautoml (>=0.3.1,<0.4.0)
30
- Requires-Dist: lightfm (==1.17)
31
- Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "all"
32
- Requires-Dist: llvmlite (>=0.32.1)
33
- Requires-Dist: numba (>=0.50)
27
+ Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
34
28
  Requires-Dist: numpy (>=1.20.0)
29
+ Requires-Dist: onnx (>=1.16.2,<1.17.0) ; extra == "torch-openvino" or extra == "all"
30
+ Requires-Dist: openvino (>=2024.3.0,<2024.4.0) ; extra == "torch-openvino" or extra == "all"
35
31
  Requires-Dist: optuna (>=3.2.0,<3.3.0)
36
32
  Requires-Dist: pandas (>=1.3.5,<=2.2.2)
37
33
  Requires-Dist: polars (>=1.0.0,<1.1.0)
38
34
  Requires-Dist: psutil (>=6.0.0,<6.1.0)
39
35
  Requires-Dist: pyarrow (>=12.0.1)
40
- Requires-Dist: pyspark (>=3.0,<3.5) ; (python_full_version >= "3.8.1" and python_version < "3.11") and (extra == "spark" or extra == "all")
41
- Requires-Dist: pyspark (>=3.4,<3.5) ; (python_version >= "3.11" and python_version < "3.12") and (extra == "spark" or extra == "all")
42
- Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "all"
43
- Requires-Dist: sb-obp (>=0.5.8,<0.6.0)
36
+ Requires-Dist: pyspark (>=3.0,<3.6) ; (python_full_version >= "3.8.1" and python_version < "3.11") and (extra == "spark" or extra == "all")
37
+ Requires-Dist: pyspark (>=3.4,<3.6) ; (python_version >= "3.11" and python_version < "3.12") and (extra == "spark" or extra == "all")
38
+ Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
44
39
  Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
45
40
  Requires-Dist: scipy (>=1.8.1,<2.0.0)
46
- Requires-Dist: torch (>=1.8,<=2.4.0) ; extra == "torch" or extra == "all"
41
+ Requires-Dist: torch (>=1.8,<=2.5.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
47
42
  Project-URL: Repository, https://github.com/sb-ai-lab/RePlay
48
43
  Description-Content-Type: text/markdown
49
44
 
@@ -52,11 +47,15 @@ Description-Content-Type: text/markdown
52
47
 
53
48
  [![GitHub License](https://img.shields.io/github/license/sb-ai-lab/RePlay)](https://github.com/sb-ai-lab/RePlay/blob/main/LICENSE)
54
49
  [![PyPI - Version](https://img.shields.io/pypi/v/replay-rec)](https://pypi.org/project/replay-rec)
50
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://sb-ai-lab.github.io/RePlay/)
55
51
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/replay-rec)](https://pypistats.org/packages/replay-rec)
56
52
  <br>
57
53
  [![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/sb-ai-lab/replay/main.yml)](https://github.com/sb-ai-lab/RePlay/actions/workflows/main.yml?query=branch%3Amain)
54
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
55
+ [![Python Versions](https://img.shields.io/pypi/pyversions/replay-rec.svg?logo=python&logoColor=white)](https://pypi.org/project/replay-rec)
58
56
  [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/sb-ai-lab/RePlay/discussions)
59
57
 
58
+
60
59
  RePlay is an advanced framework designed to facilitate the development and evaluation of recommendation systems. It provides a robust set of tools covering the entire lifecycle of a recommendation system pipeline:
61
60
 
62
61
  ## 🚀 Features:
@@ -71,61 +70,25 @@ RePlay is an advanced framework designed to facilitate the development and evalu
71
70
  1. **Diverse Hardware Support:** Compatible with various hardware configurations including CPU, GPU, Multi-GPU.
72
71
  2. **Cluster Computing Integration:** Integrating with PySpark for distributed computing, enabling scalability for large-scale recommendation systems.
73
72
 
74
- ## 📖 Documentation is available [here](https://sb-ai-lab.github.io/RePlay/).
75
-
76
73
  <a name="toc"></a>
77
74
  # Table of Contents
78
75
 
79
- * [Installation](#installation)
80
76
  * [Quickstart](#quickstart)
77
+ * [Installation](#installation)
81
78
  * [Resources](#examples)
82
79
  * [Contributing to RePlay](#contributing)
83
80
 
84
81
 
85
- <a name="installation"></a>
86
- ## 🔧 Installation
87
-
88
- Installation via `pip` package manager is recommended by default:
89
-
90
- ```bash
91
- pip install replay-rec
92
- ```
93
-
94
- In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
95
- Also `experimental` submodule will not be installed.
96
-
97
- To install `experimental` submodule please specify the version with `rc0` suffix.
98
- For example:
99
-
100
- ```bash
101
- pip install replay-rec==XX.YY.ZZrc0
102
- ```
103
-
104
- ### Extras
105
-
106
- In addition to the core package, several extras are also provided, including:
107
- - `[spark]`: Install PySpark functionality
108
- - `[torch]`: Install PyTorch and Lightning functionality
109
- - `[all]`: `[spark]` `[torch]`
82
+ <a name="quickstart"></a>
83
+ ## 📈 Quickstart
110
84
 
111
- Example:
112
85
  ```bash
113
- # Install core package with PySpark dependency
114
- pip install replay-rec[spark]
115
-
116
- # Install package with experimental submodule and PySpark dependency
117
- pip install replay-rec[spark]==XX.YY.ZZrc0
86
+ pip install replay-rec[all]
118
87
  ```
119
88
 
120
- To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
121
-
122
- If you encounter an error during RePlay installation, check the [troubleshooting](https://sb-ai-lab.github.io/RePlay/pages/installation.html#troubleshooting) guide.
123
-
124
-
125
- <a name="quickstart"></a>
126
- ## 📈 Quickstart (PySpark-based)
127
-
89
+ Pyspark-based model and [fast](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) polars-based data preprocessing:
128
90
  ```python
91
+ from polars import from_pandas
129
92
  from rs_datasets import MovieLens
130
93
 
131
94
  from replay.data import Dataset, FeatureHint, FeatureInfo, FeatureSchema, FeatureType
@@ -139,10 +102,10 @@ from replay.splitters import RatioSplitter
139
102
  spark = State().session
140
103
 
141
104
  ml_1m = MovieLens("1m")
142
- K=10
105
+ K = 10
143
106
 
144
- # data preprocessing
145
- interactions = convert2spark(ml_1m.ratings)
107
+ # convert data to polars
108
+ interactions = from_pandas(ml_1m.ratings)
146
109
 
147
110
  # data splitting
148
111
  splitter = RatioSplitter(
@@ -156,7 +119,7 @@ splitter = RatioSplitter(
156
119
  )
157
120
  train, test = splitter.split(interactions)
158
121
 
159
- # dataset creating
122
+ # datasets creation
160
123
  feature_schema = FeatureSchema(
161
124
  [
162
125
  FeatureInfo(
@@ -182,20 +145,18 @@ feature_schema = FeatureSchema(
182
145
  ]
183
146
  )
184
147
 
185
- train_dataset = Dataset(
186
- feature_schema=feature_schema,
187
- interactions=train,
188
- )
189
- test_dataset = Dataset(
190
- feature_schema=feature_schema,
191
- interactions=test,
192
- )
148
+ train_dataset = Dataset(feature_schema=feature_schema, interactions=train)
149
+ test_dataset = Dataset(feature_schema=feature_schema, interactions=test)
193
150
 
194
151
  # data encoding
195
152
  encoder = DatasetLabelEncoder()
196
153
  train_dataset = encoder.fit_transform(train_dataset)
197
154
  test_dataset = encoder.transform(test_dataset)
198
155
 
156
+ # convert datasets to spark
157
+ train_dataset.to_spark()
158
+ test_dataset.to_spark()
159
+
199
160
  # model training
200
161
  model = ItemKNN()
201
162
  model.fit(train_dataset)
@@ -222,6 +183,44 @@ metrics.add_result("ItemKNN", recs)
222
183
  print(metrics.results)
223
184
  ```
224
185
 
186
+ <a name="installation"></a>
187
+ ## 🔧 Installation
188
+
189
+ Installation via `pip` package manager is recommended by default:
190
+
191
+ ```bash
192
+ pip install replay-rec
193
+ ```
194
+
195
+ In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
196
+ Also `experimental` submodule will not be installed.
197
+
198
+ To install `experimental` submodule please specify the version with `rc0` suffix.
199
+ For example:
200
+
201
+ ```bash
202
+ pip install replay-rec==XX.YY.ZZrc0
203
+ ```
204
+
205
+ ### Extras
206
+
207
+ In addition to the core package, several extras are also provided, including:
208
+ - `[spark]`: Install PySpark functionality
209
+ - `[torch]`: Install PyTorch and Lightning functionality
210
+ - `[all]`: `[spark]` `[torch]`
211
+
212
+ Example:
213
+ ```bash
214
+ # Install core package with PySpark dependency
215
+ pip install replay-rec[spark]
216
+
217
+ # Install package with experimental submodule and PySpark dependency
218
+ pip install replay-rec[spark]==XX.YY.ZZrc0
219
+ ```
220
+
221
+ To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
222
+
223
+
225
224
  <a name="examples"></a>
226
225
  ## 📑 Resources
227
226
 
@@ -234,14 +233,19 @@ print(metrics.results)
234
233
  6. [06_item2item_recommendations.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/06_item2item_recommendations.ipynb) - Item to Item recommendations example.
235
234
  7. [07_filters.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/07_filters.ipynb) - An example of using filters.
236
235
  8. [08_recommending_for_categories.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/08_recommending_for_categories.ipynb) - An example of recommendation for product categories.
237
- 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformers to generate recommendations.
238
-
236
+ 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformer-based SASRec model to generate recommendations.
237
+ 10. [10_bert4rec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/10_bert4rec_example.ipynb) - An example of using transformer-based BERT4Rec model to generate recommendations.
238
+ 11. [11_sasrec_dataframes_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) - speed comparison of using different frameworks (pandas, polars, pyspark) for data processing during SASRec training.
239
+ 12. [12_neural_ts_exp.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/12_neural_ts_exp.ipynb) - An example of using Neural Thompson Sampling bandit model (based on Wide&Deep architecture).
240
+ 13. [13_personalized_bandit_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/13_personalized_bandit_comparison.ipynb) - A comparison of context-free and contextual bandit models.
241
+ 14. [14_hierarchical_recommender.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/14_hierarchical_recommender.ipynb) - An example of using HierarchicalRecommender with user-disjoint LinUCB.
239
242
 
240
243
  ### Videos and papers
241
244
  * **Video guides**:
242
245
  - [Replay for offline recommendations, AI Journey 2021](https://www.youtube.com/watch?v=ejQZKGAG0xs)
243
246
 
244
247
  * **Research papers**:
248
+ - [RePlay: a Recommendation Framework for Experimentation and Production Use](https://arxiv.org/abs/2409.07272) Alexey Vasilev, Anna Volodkevich, Denis Kulandin, Tatiana Bysheva, Anton Klenitskiy. In The 18th ACM Conference on Recommender Systems (RecSys '24)
245
249
  - [Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?](https://doi.org/10.1145/3604915.3610644) Anton Klenitskiy, Alexey Vasilev. In The 17th ACM Conference on Recommender Systems (RecSys '23)
246
250
  - [The Long Tail of Context: Does it Exist and Matter?](https://arxiv.org/abs/2210.01023). Konstantin Bauman, Alexey Vasilev, Alexander Tuzhilin. In Workshop on Context-Aware Recommender Systems (CARS) (RecSys '22)
247
251
  - [Multiobjective Evaluation of Reinforcement Learning Based Recommender Systems](https://doi.org/10.1145/3523227.3551485). Alexey Grishanov, Anastasia Ianina, Konstantin Vorontsov. In The 16th ACM Conference on Recommender Systems (RecSys '22)
@@ -252,3 +256,4 @@ print(metrics.results)
252
256
 
253
257
  We welcome community contributions. For details please check our [contributing guidelines](CONTRIBUTING.md).
254
258
 
259
+
@@ -3,11 +3,15 @@
3
3
 
4
4
  [![GitHub License](https://img.shields.io/github/license/sb-ai-lab/RePlay)](https://github.com/sb-ai-lab/RePlay/blob/main/LICENSE)
5
5
  [![PyPI - Version](https://img.shields.io/pypi/v/replay-rec)](https://pypi.org/project/replay-rec)
6
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://sb-ai-lab.github.io/RePlay/)
6
7
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/replay-rec)](https://pypistats.org/packages/replay-rec)
7
8
  <br>
8
9
  [![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/sb-ai-lab/replay/main.yml)](https://github.com/sb-ai-lab/RePlay/actions/workflows/main.yml?query=branch%3Amain)
10
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
11
+ [![Python Versions](https://img.shields.io/pypi/pyversions/replay-rec.svg?logo=python&logoColor=white)](https://pypi.org/project/replay-rec)
9
12
  [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/sb-ai-lab/RePlay/discussions)
10
13
 
14
+
11
15
  RePlay is an advanced framework designed to facilitate the development and evaluation of recommendation systems. It provides a robust set of tools covering the entire lifecycle of a recommendation system pipeline:
12
16
 
13
17
  ## 🚀 Features:
@@ -22,61 +26,25 @@ RePlay is an advanced framework designed to facilitate the development and evalu
22
26
  1. **Diverse Hardware Support:** Compatible with various hardware configurations including CPU, GPU, Multi-GPU.
23
27
  2. **Cluster Computing Integration:** Integrating with PySpark for distributed computing, enabling scalability for large-scale recommendation systems.
24
28
 
25
- ## 📖 Documentation is available [here](https://sb-ai-lab.github.io/RePlay/).
26
-
27
29
  <a name="toc"></a>
28
30
  # Table of Contents
29
31
 
30
- * [Installation](#installation)
31
32
  * [Quickstart](#quickstart)
33
+ * [Installation](#installation)
32
34
  * [Resources](#examples)
33
35
  * [Contributing to RePlay](#contributing)
34
36
 
35
37
 
36
- <a name="installation"></a>
37
- ## 🔧 Installation
38
-
39
- Installation via `pip` package manager is recommended by default:
40
-
41
- ```bash
42
- pip install replay-rec
43
- ```
44
-
45
- In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
46
- Also `experimental` submodule will not be installed.
47
-
48
- To install `experimental` submodule please specify the version with `rc0` suffix.
49
- For example:
50
-
51
- ```bash
52
- pip install replay-rec==XX.YY.ZZrc0
53
- ```
54
-
55
- ### Extras
56
-
57
- In addition to the core package, several extras are also provided, including:
58
- - `[spark]`: Install PySpark functionality
59
- - `[torch]`: Install PyTorch and Lightning functionality
60
- - `[all]`: `[spark]` `[torch]`
38
+ <a name="quickstart"></a>
39
+ ## 📈 Quickstart
61
40
 
62
- Example:
63
41
  ```bash
64
- # Install core package with PySpark dependency
65
- pip install replay-rec[spark]
66
-
67
- # Install package with experimental submodule and PySpark dependency
68
- pip install replay-rec[spark]==XX.YY.ZZrc0
42
+ pip install replay-rec[all]
69
43
  ```
70
44
 
71
- To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
72
-
73
- If you encounter an error during RePlay installation, check the [troubleshooting](https://sb-ai-lab.github.io/RePlay/pages/installation.html#troubleshooting) guide.
74
-
75
-
76
- <a name="quickstart"></a>
77
- ## 📈 Quickstart (PySpark-based)
78
-
45
+ Pyspark-based model and [fast](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) polars-based data preprocessing:
79
46
  ```python
47
+ from polars import from_pandas
80
48
  from rs_datasets import MovieLens
81
49
 
82
50
  from replay.data import Dataset, FeatureHint, FeatureInfo, FeatureSchema, FeatureType
@@ -90,10 +58,10 @@ from replay.splitters import RatioSplitter
90
58
  spark = State().session
91
59
 
92
60
  ml_1m = MovieLens("1m")
93
- K=10
61
+ K = 10
94
62
 
95
- # data preprocessing
96
- interactions = convert2spark(ml_1m.ratings)
63
+ # convert data to polars
64
+ interactions = from_pandas(ml_1m.ratings)
97
65
 
98
66
  # data splitting
99
67
  splitter = RatioSplitter(
@@ -107,7 +75,7 @@ splitter = RatioSplitter(
107
75
  )
108
76
  train, test = splitter.split(interactions)
109
77
 
110
- # dataset creating
78
+ # datasets creation
111
79
  feature_schema = FeatureSchema(
112
80
  [
113
81
  FeatureInfo(
@@ -133,20 +101,18 @@ feature_schema = FeatureSchema(
133
101
  ]
134
102
  )
135
103
 
136
- train_dataset = Dataset(
137
- feature_schema=feature_schema,
138
- interactions=train,
139
- )
140
- test_dataset = Dataset(
141
- feature_schema=feature_schema,
142
- interactions=test,
143
- )
104
+ train_dataset = Dataset(feature_schema=feature_schema, interactions=train)
105
+ test_dataset = Dataset(feature_schema=feature_schema, interactions=test)
144
106
 
145
107
  # data encoding
146
108
  encoder = DatasetLabelEncoder()
147
109
  train_dataset = encoder.fit_transform(train_dataset)
148
110
  test_dataset = encoder.transform(test_dataset)
149
111
 
112
+ # convert datasets to spark
113
+ train_dataset.to_spark()
114
+ test_dataset.to_spark()
115
+
150
116
  # model training
151
117
  model = ItemKNN()
152
118
  model.fit(train_dataset)
@@ -173,6 +139,44 @@ metrics.add_result("ItemKNN", recs)
173
139
  print(metrics.results)
174
140
  ```
175
141
 
142
+ <a name="installation"></a>
143
+ ## 🔧 Installation
144
+
145
+ Installation via `pip` package manager is recommended by default:
146
+
147
+ ```bash
148
+ pip install replay-rec
149
+ ```
150
+
151
+ In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
152
+ Also `experimental` submodule will not be installed.
153
+
154
+ To install `experimental` submodule please specify the version with `rc0` suffix.
155
+ For example:
156
+
157
+ ```bash
158
+ pip install replay-rec==XX.YY.ZZrc0
159
+ ```
160
+
161
+ ### Extras
162
+
163
+ In addition to the core package, several extras are also provided, including:
164
+ - `[spark]`: Install PySpark functionality
165
+ - `[torch]`: Install PyTorch and Lightning functionality
166
+ - `[all]`: `[spark]` `[torch]`
167
+
168
+ Example:
169
+ ```bash
170
+ # Install core package with PySpark dependency
171
+ pip install replay-rec[spark]
172
+
173
+ # Install package with experimental submodule and PySpark dependency
174
+ pip install replay-rec[spark]==XX.YY.ZZrc0
175
+ ```
176
+
177
+ To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
178
+
179
+
176
180
  <a name="examples"></a>
177
181
  ## 📑 Resources
178
182
 
@@ -185,14 +189,19 @@ print(metrics.results)
185
189
  6. [06_item2item_recommendations.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/06_item2item_recommendations.ipynb) - Item to Item recommendations example.
186
190
  7. [07_filters.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/07_filters.ipynb) - An example of using filters.
187
191
  8. [08_recommending_for_categories.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/08_recommending_for_categories.ipynb) - An example of recommendation for product categories.
188
- 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformers to generate recommendations.
189
-
192
+ 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformer-based SASRec model to generate recommendations.
193
+ 10. [10_bert4rec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/10_bert4rec_example.ipynb) - An example of using transformer-based BERT4Rec model to generate recommendations.
194
+ 11. [11_sasrec_dataframes_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) - speed comparison of using different frameworks (pandas, polars, pyspark) for data processing during SASRec training.
195
+ 12. [12_neural_ts_exp.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/12_neural_ts_exp.ipynb) - An example of using Neural Thompson Sampling bandit model (based on Wide&Deep architecture).
196
+ 13. [13_personalized_bandit_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/13_personalized_bandit_comparison.ipynb) - A comparison of context-free and contextual bandit models.
197
+ 14. [14_hierarchical_recommender.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/14_hierarchical_recommender.ipynb) - An example of using HierarchicalRecommender with user-disjoint LinUCB.
190
198
 
191
199
  ### Videos and papers
192
200
  * **Video guides**:
193
201
  - [Replay for offline recommendations, AI Journey 2021](https://www.youtube.com/watch?v=ejQZKGAG0xs)
194
202
 
195
203
  * **Research papers**:
204
+ - [RePlay: a Recommendation Framework for Experimentation and Production Use](https://arxiv.org/abs/2409.07272) Alexey Vasilev, Anna Volodkevich, Denis Kulandin, Tatiana Bysheva, Anton Klenitskiy. In The 18th ACM Conference on Recommender Systems (RecSys '24)
196
205
  - [Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?](https://doi.org/10.1145/3604915.3610644) Anton Klenitskiy, Alexey Vasilev. In The 17th ACM Conference on Recommender Systems (RecSys '23)
197
206
  - [The Long Tail of Context: Does it Exist and Matter?](https://arxiv.org/abs/2210.01023). Konstantin Bauman, Alexey Vasilev, Alexander Tuzhilin. In Workshop on Context-Aware Recommender Systems (CARS) (RecSys '22)
198
207
  - [Multiobjective Evaluation of Reinforcement Learning Based Recommender Systems](https://doi.org/10.1145/3523227.3551485). Alexey Grishanov, Anastasia Ianina, Konstantin Vorontsov. In The 16th ACM Conference on Recommender Systems (RecSys '22)
@@ -202,3 +211,4 @@ print(metrics.results)
202
211
  ## 💡 Contributing to RePlay
203
212
 
204
213
  We welcome community contributions. For details please check our [contributing guidelines](CONTRIBUTING.md).
214
+
@@ -39,8 +39,9 @@ classifiers = [
39
39
  ]
40
40
  exclude = [
41
41
  "replay/conftest.py",
42
+ "replay/experimental",
42
43
  ]
43
- version = "0.18.0.preview"
44
+ version = "0.18.1"
44
45
 
45
46
  [tool.poetry.dependencies]
46
47
  python = ">=3.8.1, <3.12"
@@ -52,28 +53,23 @@ scipy = "^1.8.1"
52
53
  psutil = "~6.0.0"
53
54
  scikit-learn = "^1.0.2"
54
55
  pyarrow = ">=12.0.1"
55
- fixed-install-nmslib = "2.1.2"
56
- hnswlib = "^0.7.0"
56
+ openvino = {version = "~2024.3.0", optional = true}
57
+ onnx = {version = "~1.16.2", optional = true}
57
58
  pyspark = [
58
- {version = ">=3.4,<3.5", python = ">=3.11,<3.12"},
59
- {version = ">=3.0,<3.5", python = ">=3.8.1,<3.11"},
59
+ {version = ">=3.4,<3.6", python = ">=3.11,<3.12", optional = true},
60
+ {version = ">=3.0,<3.6", python = ">=3.8.1,<3.11", optional = true},
60
61
  ]
61
- torch = ">=1.8, <=2.4.0"
62
- lightning = ">=2.0.2, <=2.4.0"
63
- pytorch-ranger = "^0.1.1"
64
- lightfm = "1.17"
65
- lightautoml = "~0.3.1"
66
- numba = ">=0.50"
67
- llvmlite = ">=0.32.1"
68
- sb-obp = "^0.5.8"
69
- d3rlpy = "^2.0.4"
70
- implicit = "~0.7.0"
71
- gym = "^0.26.0"
62
+ torch = {version = ">=1.8, <=2.5.0", optional = true}
63
+ lightning = {version = ">=2.0.2, <=2.4.0", optional = true}
64
+ pytorch-ranger = {version = "^0.1.1", optional = true}
65
+ fixed-install-nmslib = "2.1.2"
66
+ hnswlib = "^0.7.0"
72
67
 
73
68
  [tool.poetry.extras]
74
69
  spark = ["pyspark"]
75
70
  torch = ["torch", "pytorch-ranger", "lightning"]
76
- all = ["pyspark", "torch", "pytorch-ranger", "lightning"]
71
+ torch-openvino = ["torch", "pytorch-ranger", "lightning", "openvino", "onnx"]
72
+ all = ["pyspark", "torch", "pytorch-ranger", "lightning", "openvino", "onnx"]
77
73
 
78
74
  [tool.poetry.group.dev.dependencies]
79
75
  jupyter = "~1.0.0"
@@ -92,10 +88,11 @@ myst-parser = "1.0.0"
92
88
  ghp-import = "2.1.0"
93
89
  docutils = "0.16"
94
90
  data-science-types = "0.2.23"
91
+ filelock = "~3.14.0"
95
92
 
96
93
  [tool.poetry-dynamic-versioning]
97
94
  enable = false
98
- format-jinja = """0.18.0{{ env['PACKAGE_SUFFIX'] }}"""
95
+ format-jinja = """0.18.1{{ env['PACKAGE_SUFFIX'] }}"""
99
96
  vcs = "git"
100
97
 
101
98
  [tool.ruff]
@@ -0,0 +1,3 @@
1
+ """ RecSys library """
2
+
3
+ __version__ = "0.18.1"
@@ -458,13 +458,23 @@ class Dataset:
458
458
  if feature.feature_hint in [FeatureHint.ITEM_ID, FeatureHint.QUERY_ID]:
459
459
  return nunique(self._ids_feature_map[feature.feature_hint], column)
460
460
  assert feature.feature_source
461
+ if feature.feature_type == FeatureType.CATEGORICAL_LIST:
462
+ if self.is_spark:
463
+ data = (
464
+ self._feature_source_map[feature.feature_source]
465
+ .select(column)
466
+ .withColumn(column, sf.explode(column))
467
+ )
468
+ else:
469
+ data = self._feature_source_map[feature.feature_source][[column]].explode(column)
470
+ return nunique(data, column)
461
471
  return nunique(self._feature_source_map[feature.feature_source], column)
462
472
 
463
473
  return callback
464
474
 
465
475
  def _set_cardinality(self, features_list: Sequence[FeatureInfo]) -> None:
466
476
  for feature in features_list:
467
- if feature.feature_type == FeatureType.CATEGORICAL:
477
+ if feature.feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST]:
468
478
  feature._set_cardinality_callback(self._get_cardinality(feature))
469
479
 
470
480
  def _fill_feature_schema(self, feature_schema: FeatureSchema) -> FeatureSchema:
@@ -581,6 +591,7 @@ class Dataset:
581
591
  data: DataFrameLike,
582
592
  column: str,
583
593
  source: FeatureSource,
594
+ feature_type: FeatureType,
584
595
  cardinality: Optional[int],
585
596
  ) -> None:
586
597
  """
@@ -593,6 +604,16 @@ class Dataset:
593
604
  Option: Keep this criterion, but suggest the user to disable the check if he understands
594
605
  that the criterion will not pass.
595
606
  """
607
+ if feature_type == FeatureType.CATEGORICAL_LIST: # explode column if list
608
+ data = data.withColumn(column, sf.explode(column)) if self.is_spark else data[[column]].explode(column)
609
+
610
+ if self.is_pandas:
611
+ try:
612
+ data[column] = data[column].astype(int)
613
+ except Exception:
614
+ msg = f"IDs in {source.name}.{column} are not encoded. They are not int."
615
+ raise ValueError(msg)
616
+
596
617
  if self.is_pandas:
597
618
  is_int = np.issubdtype(dict(data.dtypes)[column], int)
598
619
  elif self.is_spark:
@@ -632,6 +653,7 @@ class Dataset:
632
653
  self.interactions,
633
654
  feature.column,
634
655
  FeatureSource.INTERACTIONS,
656
+ feature.feature_type,
635
657
  feature.cardinality,
636
658
  )
637
659
  if self.item_features is not None:
@@ -639,6 +661,7 @@ class Dataset:
639
661
  self.item_features,
640
662
  feature.column,
641
663
  FeatureSource.ITEM_FEATURES,
664
+ feature.feature_type,
642
665
  feature.cardinality,
643
666
  )
644
667
  elif feature.feature_hint == FeatureHint.QUERY_ID:
@@ -646,6 +669,7 @@ class Dataset:
646
669
  self.interactions,
647
670
  feature.column,
648
671
  FeatureSource.INTERACTIONS,
672
+ feature.feature_type,
649
673
  feature.cardinality,
650
674
  )
651
675
  if self.query_features is not None:
@@ -653,6 +677,7 @@ class Dataset:
653
677
  self.query_features,
654
678
  feature.column,
655
679
  FeatureSource.QUERY_FEATURES,
680
+ feature.feature_type,
656
681
  feature.cardinality,
657
682
  )
658
683
  else:
@@ -661,6 +686,7 @@ class Dataset:
661
686
  data,
662
687
  feature.column,
663
688
  feature.feature_source,
689
+ feature.feature_type,
664
690
  feature.cardinality,
665
691
  )
666
692
 
@@ -8,8 +8,8 @@ Contains classes for encoding categorical data
8
8
  import warnings
9
9
  from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union
10
10
 
11
- from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource
12
- from replay.preprocessing import LabelEncoder, LabelEncodingRule
11
+ from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
12
+ from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
13
13
  from replay.preprocessing.label_encoder import HandleUnknownStrategies
14
14
 
15
15
 
@@ -62,7 +62,10 @@ class DatasetLabelEncoder:
62
62
 
63
63
  self._fill_features_columns(dataset.feature_schema)
64
64
  for column, feature_info in dataset.feature_schema.categorical_features.items():
65
- encoding_rule = LabelEncodingRule(
65
+ encoding_rule_class = (
66
+ SequenceEncodingRule if feature_info.feature_type == FeatureType.CATEGORICAL_LIST else LabelEncodingRule
67
+ )
68
+ encoding_rule = encoding_rule_class(
66
69
  column, handle_unknown=self._handle_unknown_rule, default_value=self._default_value_rule
67
70
  )
68
71
  if feature_info.feature_hint == FeatureHint.QUERY_ID: