replay-rec 0.20.0__tar.gz → 0.20.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/PKG-INFO +17 -17
  2. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/README.md +6 -0
  3. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/pyproject.toml +11 -23
  4. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/__init__.py +1 -1
  5. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset.py +10 -9
  6. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset_utils/dataset_label_encoder.py +5 -4
  7. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/schema.py +9 -18
  8. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/sequence_tokenizer.py +16 -15
  9. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/sequential_dataset.py +4 -4
  10. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/torch_sequential_dataset.py +5 -4
  11. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/utils.py +2 -1
  12. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/schema.py +3 -12
  13. replay_rec-0.20.0rc0/replay/experimental/metrics/__init__.py +62 -0
  14. replay_rec-0.20.0rc0/replay/experimental/metrics/base_metric.py +603 -0
  15. replay_rec-0.20.0rc0/replay/experimental/metrics/coverage.py +97 -0
  16. replay_rec-0.20.0rc0/replay/experimental/metrics/experiment.py +175 -0
  17. replay_rec-0.20.0rc0/replay/experimental/metrics/hitrate.py +26 -0
  18. replay_rec-0.20.0rc0/replay/experimental/metrics/map.py +30 -0
  19. replay_rec-0.20.0rc0/replay/experimental/metrics/mrr.py +18 -0
  20. replay_rec-0.20.0rc0/replay/experimental/metrics/ncis_precision.py +31 -0
  21. replay_rec-0.20.0rc0/replay/experimental/metrics/ndcg.py +49 -0
  22. replay_rec-0.20.0rc0/replay/experimental/metrics/precision.py +22 -0
  23. replay_rec-0.20.0rc0/replay/experimental/metrics/recall.py +25 -0
  24. replay_rec-0.20.0rc0/replay/experimental/metrics/rocauc.py +49 -0
  25. replay_rec-0.20.0rc0/replay/experimental/metrics/surprisal.py +90 -0
  26. replay_rec-0.20.0rc0/replay/experimental/metrics/unexpectedness.py +76 -0
  27. replay_rec-0.20.0rc0/replay/experimental/models/__init__.py +50 -0
  28. replay_rec-0.20.0rc0/replay/experimental/models/admm_slim.py +257 -0
  29. replay_rec-0.20.0rc0/replay/experimental/models/base_neighbour_rec.py +200 -0
  30. replay_rec-0.20.0rc0/replay/experimental/models/base_rec.py +1386 -0
  31. replay_rec-0.20.0rc0/replay/experimental/models/base_torch_rec.py +234 -0
  32. replay_rec-0.20.0rc0/replay/experimental/models/cql.py +454 -0
  33. replay_rec-0.20.0rc0/replay/experimental/models/ddpg.py +932 -0
  34. replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/dt4rec.py +189 -0
  35. replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/gpt1.py +401 -0
  36. replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/trainer.py +127 -0
  37. replay_rec-0.20.0rc0/replay/experimental/models/dt4rec/utils.py +264 -0
  38. replay_rec-0.20.0rc0/replay/experimental/models/extensions/spark_custom_models/als_extension.py +792 -0
  39. replay_rec-0.20.0rc0/replay/experimental/models/hierarchical_recommender.py +331 -0
  40. replay_rec-0.20.0rc0/replay/experimental/models/implicit_wrap.py +131 -0
  41. replay_rec-0.20.0rc0/replay/experimental/models/lightfm_wrap.py +303 -0
  42. replay_rec-0.20.0rc0/replay/experimental/models/mult_vae.py +332 -0
  43. replay_rec-0.20.0rc0/replay/experimental/models/neural_ts.py +986 -0
  44. replay_rec-0.20.0rc0/replay/experimental/models/neuromf.py +406 -0
  45. replay_rec-0.20.0rc0/replay/experimental/models/scala_als.py +293 -0
  46. replay_rec-0.20.0rc0/replay/experimental/models/u_lin_ucb.py +115 -0
  47. replay_rec-0.20.0rc0/replay/experimental/nn/data/__init__.py +1 -0
  48. replay_rec-0.20.0rc0/replay/experimental/nn/data/schema_builder.py +102 -0
  49. replay_rec-0.20.0rc0/replay/experimental/preprocessing/__init__.py +3 -0
  50. replay_rec-0.20.0rc0/replay/experimental/preprocessing/data_preparator.py +839 -0
  51. replay_rec-0.20.0rc0/replay/experimental/preprocessing/padder.py +229 -0
  52. replay_rec-0.20.0rc0/replay/experimental/preprocessing/sequence_generator.py +208 -0
  53. replay_rec-0.20.0rc0/replay/experimental/scenarios/__init__.py +1 -0
  54. replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/__init__.py +8 -0
  55. replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/obp_optuna_objective.py +74 -0
  56. replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/replay_offline.py +261 -0
  57. replay_rec-0.20.0rc0/replay/experimental/scenarios/obp_wrapper/utils.py +85 -0
  58. replay_rec-0.20.0rc0/replay/experimental/scenarios/two_stages/reranker.py +117 -0
  59. replay_rec-0.20.0rc0/replay/experimental/scenarios/two_stages/two_stages_scenario.py +757 -0
  60. replay_rec-0.20.0rc0/replay/experimental/utils/logger.py +24 -0
  61. replay_rec-0.20.0rc0/replay/experimental/utils/model_handler.py +186 -0
  62. replay_rec-0.20.0rc0/replay/experimental/utils/session_handler.py +44 -0
  63. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/base_metric.py +11 -10
  64. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/categorical_diversity.py +8 -8
  65. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/coverage.py +4 -4
  66. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/experiment.py +3 -3
  67. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/hitrate.py +1 -3
  68. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/map.py +1 -3
  69. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/mrr.py +1 -3
  70. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/ndcg.py +1 -2
  71. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/novelty.py +3 -3
  72. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/offline_metrics.py +16 -16
  73. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/precision.py +1 -3
  74. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/recall.py +1 -3
  75. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/rocauc.py +1 -3
  76. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/surprisal.py +4 -4
  77. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/torch_metrics_builder.py +13 -12
  78. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/unexpectedness.py +2 -2
  79. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/als.py +2 -2
  80. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/association_rules.py +4 -3
  81. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/base_neighbour_rec.py +3 -2
  82. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/base_rec.py +11 -10
  83. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/cat_pop_rec.py +2 -1
  84. replay_rec-0.20.0rc0/replay/models/extensions/ann/__init__.py +0 -0
  85. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/ann_mixin.py +2 -1
  86. replay_rec-0.20.0rc0/replay/models/extensions/ann/entities/__init__.py +0 -0
  87. replay_rec-0.20.0rc0/replay/models/extensions/ann/index_builders/__init__.py +0 -0
  88. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +2 -1
  89. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +2 -1
  90. replay_rec-0.20.0rc0/replay/models/extensions/ann/index_inferers/__init__.py +0 -0
  91. replay_rec-0.20.0rc0/replay/models/extensions/ann/index_stores/__init__.py +0 -0
  92. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/lin_ucb.py +3 -3
  93. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/optimizer_utils/optimizer_factory.py +2 -2
  94. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/dataset.py +2 -2
  95. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/lightning.py +3 -3
  96. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/model.py +2 -2
  97. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/callbacks/prediction_callbacks.py +12 -12
  98. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/callbacks/validation_callback.py +9 -9
  99. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/base_compiled_model.py +5 -5
  100. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/postprocessors/_base.py +2 -3
  101. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/postprocessors/postprocessors.py +10 -10
  102. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/lightning.py +3 -3
  103. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/model.py +8 -8
  104. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/slim.py +2 -2
  105. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/ucb.py +2 -2
  106. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/word2vec.py +3 -3
  107. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/discretizer.py +8 -7
  108. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/filters.py +4 -4
  109. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/history_based_fp.py +6 -6
  110. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/label_encoder.py +8 -7
  111. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/scenarios/fallback.py +4 -3
  112. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/base_splitter.py +3 -3
  113. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/cold_user_random_splitter.py +4 -4
  114. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/k_folds.py +4 -4
  115. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/last_n_splitter.py +10 -10
  116. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/new_users_splitter.py +4 -4
  117. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/random_splitter.py +4 -4
  118. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/ratio_splitter.py +10 -10
  119. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/time_splitter.py +6 -6
  120. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/two_stage_splitter.py +4 -4
  121. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/__init__.py +1 -0
  122. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/common.py +1 -1
  123. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/session_handler.py +2 -2
  124. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/spark_utils.py +6 -5
  125. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/types.py +3 -1
  126. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/LICENSE +0 -0
  127. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/NOTICE +0 -0
  128. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/__init__.py +0 -0
  129. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/dataset_utils/__init__.py +0 -0
  130. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/nn/__init__.py +0 -0
  131. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/data/spark_schema.py +0 -0
  132. {replay_rec-0.20.0/replay/models/extensions → replay_rec-0.20.0rc0/replay/experimental}/__init__.py +0 -0
  133. {replay_rec-0.20.0/replay/models/extensions/ann → replay_rec-0.20.0rc0/replay/experimental/models/dt4rec}/__init__.py +0 -0
  134. {replay_rec-0.20.0/replay/models/extensions/ann/entities → replay_rec-0.20.0rc0/replay/experimental/models/extensions/spark_custom_models}/__init__.py +0 -0
  135. {replay_rec-0.20.0/replay/models/extensions/ann/index_builders → replay_rec-0.20.0rc0/replay/experimental/scenarios/two_stages}/__init__.py +0 -0
  136. {replay_rec-0.20.0/replay/models/extensions/ann/index_inferers → replay_rec-0.20.0rc0/replay/experimental/utils}/__init__.py +0 -0
  137. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/__init__.py +0 -0
  138. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/metrics/descriptors.py +0 -0
  139. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/__init__.py +0 -0
  140. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/cluster.py +0 -0
  141. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/common.py +0 -0
  142. {replay_rec-0.20.0/replay/models/extensions/ann/index_stores → replay_rec-0.20.0rc0/replay/models/extensions}/__init__.py +0 -0
  143. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/entities/base_hnsw_param.py +0 -0
  144. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/entities/hnswlib_param.py +0 -0
  145. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -0
  146. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/base_index_builder.py +0 -0
  147. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +0 -0
  148. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +0 -0
  149. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +0 -0
  150. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/base_inferer.py +0 -0
  151. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +0 -0
  152. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +0 -0
  153. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +0 -0
  154. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +0 -0
  155. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_inferers/utils.py +0 -0
  156. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/base_index_store.py +0 -0
  157. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/hdfs_index_store.py +0 -0
  158. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/shared_disk_index_store.py +0 -0
  159. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/spark_files_index_store.py +0 -0
  160. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/index_stores/utils.py +0 -0
  161. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/extensions/ann/utils.py +0 -0
  162. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/kl_ucb.py +0 -0
  163. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/knn.py +0 -0
  164. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/__init__.py +0 -0
  165. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/loss/__init__.py +0 -0
  166. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/loss/sce.py +0 -0
  167. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/optimizer_utils/__init__.py +0 -0
  168. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/__init__.py +0 -0
  169. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/bert4rec/__init__.py +0 -0
  170. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/callbacks/__init__.py +0 -0
  171. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/__init__.py +0 -0
  172. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/bert4rec_compiled.py +0 -0
  173. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/compiled/sasrec_compiled.py +0 -0
  174. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/postprocessors/__init__.py +0 -0
  175. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/__init__.py +0 -0
  176. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/nn/sequential/sasrec/dataset.py +0 -0
  177. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/optimization/__init__.py +0 -0
  178. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/optimization/optuna_mixin.py +0 -0
  179. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/optimization/optuna_objective.py +0 -0
  180. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/pop_rec.py +0 -0
  181. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/query_pop_rec.py +0 -0
  182. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/random_rec.py +0 -0
  183. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/thompson_sampling.py +0 -0
  184. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/models/wilson.py +0 -0
  185. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/__init__.py +0 -0
  186. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/converter.py +0 -0
  187. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/preprocessing/sessionizer.py +0 -0
  188. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/scenarios/__init__.py +0 -0
  189. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/splitters/__init__.py +0 -0
  190. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/dataframe_bucketizer.py +0 -0
  191. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/distributions.py +0 -0
  192. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/model_handler.py +0 -0
  193. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/time.py +0 -0
  194. {replay_rec-0.20.0 → replay_rec-0.20.0rc0}/replay/utils/warnings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: replay-rec
3
- Version: 0.20.0
3
+ Version: 0.20.0rc0
4
4
  Summary: RecSys Library
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -14,29 +14,23 @@ Classifier: Intended Audience :: Developers
14
14
  Classifier: Intended Audience :: Science/Research
15
15
  Classifier: Natural Language :: English
16
16
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
- Provides-Extra: spark
18
- Provides-Extra: torch
19
- Provides-Extra: torch-cpu
20
- Requires-Dist: lightning (<2.6.0) ; extra == "torch" or extra == "torch-cpu"
21
- Requires-Dist: lightning ; extra == "torch"
22
- Requires-Dist: lightning ; extra == "torch-cpu"
17
+ Requires-Dist: d3rlpy (>=2.8.1,<2.9)
18
+ Requires-Dist: implicit (>=0.7.2,<0.8)
19
+ Requires-Dist: lightautoml (>=0.4.1,<0.5)
20
+ Requires-Dist: lightning (>=2.0.2,<=2.4.0)
21
+ Requires-Dist: numba (>=0.50,<1)
23
22
  Requires-Dist: numpy (>=1.20.0,<2)
24
23
  Requires-Dist: pandas (>=1.3.5,<2.4.0)
25
24
  Requires-Dist: polars (<2.0)
26
- Requires-Dist: psutil (<=7.0.0) ; extra == "spark"
27
- Requires-Dist: psutil ; extra == "spark"
25
+ Requires-Dist: psutil (<=7.0.0)
28
26
  Requires-Dist: pyarrow (<22.0)
29
- Requires-Dist: pyspark (>=3.0,<3.5) ; extra == "spark"
30
- Requires-Dist: pyspark ; extra == "spark"
31
- Requires-Dist: pytorch-optimizer (>=3.8.0,<3.9.0) ; extra == "torch" or extra == "torch-cpu"
32
- Requires-Dist: pytorch-optimizer ; extra == "torch"
33
- Requires-Dist: pytorch-optimizer ; extra == "torch-cpu"
27
+ Requires-Dist: pyspark (>=3.0,<3.5)
28
+ Requires-Dist: pytorch-optimizer (>=3.8.0,<4)
29
+ Requires-Dist: sb-obp (>=0.5.10,<0.6)
34
30
  Requires-Dist: scikit-learn (>=1.6.1,<1.7.0)
35
31
  Requires-Dist: scipy (>=1.13.1,<1.14)
36
32
  Requires-Dist: setuptools
37
- Requires-Dist: torch (>=1.8,<3.0.0) ; extra == "torch" or extra == "torch-cpu"
38
- Requires-Dist: torch ; extra == "torch"
39
- Requires-Dist: torch ; extra == "torch-cpu"
33
+ Requires-Dist: torch (>=1.8,<3.0.0)
40
34
  Requires-Dist: tqdm (>=4.67,<5)
41
35
  Project-URL: Homepage, https://sb-ai-lab.github.io/RePlay/
42
36
  Project-URL: Repository, https://github.com/sb-ai-lab/RePlay
@@ -245,6 +239,12 @@ pip install openvino onnx
245
239
  pip install hnswlib fixed-install-nmslib
246
240
  ```
247
241
 
242
+ 4) (Experimental) LightFM model support:
243
+ ```bash
244
+ pip install ligfhtfm
245
+ ```
246
+ > **_NOTE_** : LightFM is not officially supported for Python 3.12 due to discontinued maintenance of the library. If you wish to install it locally, you'll have to use a patched fork of LightFM, such as the [one used internally](https://github.com/daviddavo/lightfm).
247
+
248
248
 
249
249
  <a name="examples"></a>
250
250
  ## 📑 Resources
@@ -201,6 +201,12 @@ pip install openvino onnx
201
201
  pip install hnswlib fixed-install-nmslib
202
202
  ```
203
203
 
204
+ 4) (Experimental) LightFM model support:
205
+ ```bash
206
+ pip install ligfhtfm
207
+ ```
208
+ > **_NOTE_** : LightFM is not officially supported for Python 3.12 due to discontinued maintenance of the library. If you wish to install it locally, you'll have to use a patched fork of LightFM, such as the [one used internally](https://github.com/daviddavo/lightfm).
209
+
204
210
 
205
211
  <a name="examples"></a>
206
212
  ## 📑 Resources
@@ -40,19 +40,19 @@ dependencies = [
40
40
  "scikit-learn (>=1.6.1,<1.7.0)",
41
41
  "pyarrow (<22.0)",
42
42
  "tqdm (>=4.67,<5)",
43
- "pyspark (>=3.0,<3.5); extra == 'spark'",
44
- "psutil (<=7.0.0); extra == 'spark'",
45
- "torch (>=1.8, <3.0.0); extra == 'torch' or extra == 'torch-cpu'",
46
- "pytorch-optimizer (>=3.8.0,<3.9.0); extra == 'torch' or extra == 'torch-cpu'",
47
- "lightning (<2.6.0); extra == 'torch' or extra == 'torch-cpu'",
43
+ "torch (>=1.8,<3.0.0)",
44
+ "lightning (>=2.0.2,<=2.4.0)",
45
+ "pytorch-optimizer (>=3.8.0,<4)",
46
+ "lightautoml (>=0.4.1,<0.5)",
47
+ "numba (>=0.50,<1)",
48
+ "sb-obp (>=0.5.10,<0.6)",
49
+ "d3rlpy (>=2.8.1,<2.9)",
50
+ "implicit (>=0.7.2,<0.8)",
51
+ "pyspark (>=3.0,<3.5)",
52
+ "psutil (<=7.0.0)",
48
53
  ]
49
54
  dynamic = ["dependencies"]
50
- version = "0.20.0"
51
-
52
- [project.optional-dependencies]
53
- spark = ["pyspark", "psutil"]
54
- torch = ["torch", "pytorch-optimizer", "lightning"]
55
- torch-cpu = ["torch", "pytorch-optimizer", "lightning"]
55
+ version = "0.20.0.preview"
56
56
 
57
57
  [project.urls]
58
58
  homepage = "https://sb-ai-lab.github.io/RePlay/"
@@ -66,13 +66,6 @@ target-version = ["py39", "py310", "py311", "py312"]
66
66
  packages = [{include = "replay"}]
67
67
  exclude = [
68
68
  "replay/conftest.py",
69
- "replay/experimental",
70
- ]
71
-
72
- [tool.poetry.dependencies]
73
- torch = [
74
- {markers = "extra == 'torch-cpu' and extra !='torch'", source = "torch-cpu-mirror"},
75
- {markers = "extra == 'torch' and extra !='torch-cpu'", source = "PyPI"},
76
69
  ]
77
70
 
78
71
  [tool.poetry.group.dev.dependencies]
@@ -95,11 +88,6 @@ docutils = "0.16"
95
88
  data-science-types = "0.2.23"
96
89
  filelock = "~3.14.0"
97
90
 
98
- [[tool.poetry.source]]
99
- name = "torch-cpu-mirror"
100
- url = "https://download.pytorch.org/whl/cpu"
101
- priority = "explicit"
102
-
103
91
  [tool.poetry-dynamic-versioning]
104
92
  enable = false
105
93
  format-jinja = """0.20.0{{ env['PACKAGE_SUFFIX'] }}"""
@@ -4,4 +4,4 @@
4
4
  # functionality removed in Python 3.12 is used in downstream packages (like lightfm)
5
5
  import setuptools as _
6
6
 
7
- __version__ = "0.20.0"
7
+ __version__ = "0.20.0.preview"
@@ -5,8 +5,9 @@
5
5
  from __future__ import annotations
6
6
 
7
7
  import json
8
+ from collections.abc import Iterable, Sequence
8
9
  from pathlib import Path
9
- from typing import Callable, Dict, Iterable, List, Optional, Sequence, Union
10
+ from typing import Callable, Optional, Union
10
11
 
11
12
  import numpy as np
12
13
  from pandas import read_parquet as pd_read_parquet
@@ -315,7 +316,7 @@ class Dataset:
315
316
  :returns: Loaded Dataset.
316
317
  """
317
318
  base_path = Path(path).with_suffix(".replay").resolve()
318
- with open(base_path / "init_args.json", "r") as file:
319
+ with open(base_path / "init_args.json") as file:
319
320
  dataset_dict = json.loads(file.read())
320
321
 
321
322
  if dataframe_type not in ["pandas", "spark", "polars", None]:
@@ -436,14 +437,14 @@ class Dataset:
436
437
  )
437
438
 
438
439
  def _get_feature_source_map(self):
439
- self._feature_source_map: Dict[FeatureSource, DataFrameLike] = {
440
+ self._feature_source_map: dict[FeatureSource, DataFrameLike] = {
440
441
  FeatureSource.INTERACTIONS: self.interactions,
441
442
  FeatureSource.QUERY_FEATURES: self.query_features,
442
443
  FeatureSource.ITEM_FEATURES: self.item_features,
443
444
  }
444
445
 
445
446
  def _get_ids_source_map(self):
446
- self._ids_feature_map: Dict[FeatureHint, DataFrameLike] = {
447
+ self._ids_feature_map: dict[FeatureHint, DataFrameLike] = {
447
448
  FeatureHint.QUERY_ID: self.query_features if self.query_features is not None else self.interactions,
448
449
  FeatureHint.ITEM_ID: self.item_features if self.item_features is not None else self.interactions,
449
450
  }
@@ -499,10 +500,10 @@ class Dataset:
499
500
  )
500
501
  return FeatureSchema(features_list=features_list + filled_features)
501
502
 
502
- def _fill_unlabeled_features_sources(self, feature_schema: FeatureSchema) -> List[FeatureInfo]:
503
+ def _fill_unlabeled_features_sources(self, feature_schema: FeatureSchema) -> list[FeatureInfo]:
503
504
  features_list = list(feature_schema.all_features)
504
505
 
505
- source_mapping: Dict[str, FeatureSource] = {}
506
+ source_mapping: dict[str, FeatureSource] = {}
506
507
  for source in FeatureSource:
507
508
  dataframe = self._feature_source_map[source]
508
509
  if dataframe is not None:
@@ -524,7 +525,7 @@ class Dataset:
524
525
  self._set_cardinality(features_list=features_list)
525
526
  return features_list
526
527
 
527
- def _get_unlabeled_columns(self, source: FeatureSource, feature_schema: FeatureSchema) -> List[FeatureInfo]:
528
+ def _get_unlabeled_columns(self, source: FeatureSource, feature_schema: FeatureSchema) -> list[FeatureInfo]:
528
529
  set_source_dataframe_columns = set(self._feature_source_map[source].columns)
529
530
  set_labeled_dataframe_columns = set(feature_schema.columns)
530
531
  unlabeled_columns = set_source_dataframe_columns - set_labeled_dataframe_columns
@@ -534,13 +535,13 @@ class Dataset:
534
535
  ]
535
536
  return unlabeled_features_list
536
537
 
537
- def _fill_unlabeled_features(self, source: FeatureSource, feature_schema: FeatureSchema) -> List[FeatureInfo]:
538
+ def _fill_unlabeled_features(self, source: FeatureSource, feature_schema: FeatureSchema) -> list[FeatureInfo]:
538
539
  unlabeled_columns = self._get_unlabeled_columns(source=source, feature_schema=feature_schema)
539
540
  self._set_features_source(feature_list=unlabeled_columns, source=source)
540
541
  self._set_cardinality(features_list=unlabeled_columns)
541
542
  return unlabeled_columns
542
543
 
543
- def _set_features_source(self, feature_list: List[FeatureInfo], source: FeatureSource) -> None:
544
+ def _set_features_source(self, feature_list: list[FeatureInfo], source: FeatureSource) -> None:
544
545
  for feature in feature_list:
545
546
  feature._set_feature_source(source)
546
547
 
@@ -6,7 +6,8 @@ Contains classes for encoding categorical data
6
6
  """
7
7
 
8
8
  import warnings
9
- from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union
9
+ from collections.abc import Iterable, Iterator, Sequence
10
+ from typing import Optional, Union
10
11
 
11
12
  from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
12
13
  from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
@@ -45,9 +46,9 @@ class DatasetLabelEncoder:
45
46
  """
46
47
  self._handle_unknown_rule = handle_unknown_rule
47
48
  self._default_value_rule = default_value_rule
48
- self._encoding_rules: Dict[str, LabelEncodingRule] = {}
49
+ self._encoding_rules: dict[str, LabelEncodingRule] = {}
49
50
 
50
- self._features_columns: Dict[Union[FeatureHint, FeatureSource], Sequence[str]] = {}
51
+ self._features_columns: dict[Union[FeatureHint, FeatureSource], Sequence[str]] = {}
51
52
 
52
53
  def fit(self, dataset: Dataset) -> "DatasetLabelEncoder":
53
54
  """
@@ -161,7 +162,7 @@ class DatasetLabelEncoder:
161
162
  """
162
163
  self._check_if_initialized()
163
164
 
164
- columns_set: Set[str]
165
+ columns_set: set[str]
165
166
  columns_set = {columns} if isinstance(columns, str) else {*columns}
166
167
 
167
168
  def get_encoding_rules() -> Iterator[LabelEncodingRule]:
@@ -1,17 +1,8 @@
1
+ from collections import OrderedDict
2
+ from collections.abc import ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView
1
3
  from typing import (
2
- Dict,
3
- ItemsView,
4
- Iterable,
5
- Iterator,
6
- KeysView,
7
- List,
8
- Mapping,
9
4
  Optional,
10
- OrderedDict,
11
- Sequence,
12
- Set,
13
5
  Union,
14
- ValuesView,
15
6
  )
16
7
 
17
8
  import torch
@@ -20,7 +11,7 @@ from replay.data import FeatureHint, FeatureSource, FeatureType
20
11
 
21
12
  # Alias
22
13
  TensorMap = Mapping[str, torch.Tensor]
23
- MutableTensorMap = Dict[str, torch.Tensor]
14
+ MutableTensorMap = dict[str, torch.Tensor]
24
15
 
25
16
 
26
17
  class TensorFeatureSource:
@@ -79,7 +70,7 @@ class TensorFeatureInfo:
79
70
  feature_type: FeatureType,
80
71
  is_seq: bool = False,
81
72
  feature_hint: Optional[FeatureHint] = None,
82
- feature_sources: Optional[List[TensorFeatureSource]] = None,
73
+ feature_sources: Optional[list[TensorFeatureSource]] = None,
83
74
  cardinality: Optional[int] = None,
84
75
  padding_value: int = 0,
85
76
  embedding_dim: Optional[int] = None,
@@ -154,13 +145,13 @@ class TensorFeatureInfo:
154
145
  self._feature_hint = hint
155
146
 
156
147
  @property
157
- def feature_sources(self) -> Optional[List[TensorFeatureSource]]:
148
+ def feature_sources(self) -> Optional[list[TensorFeatureSource]]:
158
149
  """
159
150
  :returns: List of sources feature came from.
160
151
  """
161
152
  return self._feature_sources
162
153
 
163
- def _set_feature_sources(self, sources: List[TensorFeatureSource]) -> None:
154
+ def _set_feature_sources(self, sources: list[TensorFeatureSource]) -> None:
164
155
  self._feature_sources = sources
165
156
 
166
157
  @property
@@ -276,7 +267,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
276
267
 
277
268
  :returns: New tensor schema of given features.
278
269
  """
279
- features: Set[TensorFeatureInfo] = set()
270
+ features: set[TensorFeatureInfo] = set()
280
271
  for feature_name in features_to_keep:
281
272
  features.add(self._tensor_schema[feature_name])
282
273
  return TensorSchema(list(features))
@@ -432,7 +423,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
432
423
  return None
433
424
  return rating_features.item().name
434
425
 
435
- def _get_object_args(self) -> Dict:
426
+ def _get_object_args(self) -> dict:
436
427
  """
437
428
  Returns list of features represented as dictionaries.
438
429
  """
@@ -456,7 +447,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
456
447
  return features
457
448
 
458
449
  @classmethod
459
- def _create_object_by_args(cls, args: Dict) -> "TensorSchema":
450
+ def _create_object_by_args(cls, args: dict) -> "TensorSchema":
460
451
  features_list = []
461
452
  for feature_data in args:
462
453
  feature_data["feature_sources"] = (
@@ -2,8 +2,9 @@ import abc
2
2
  import json
3
3
  import pickle
4
4
  import warnings
5
+ from collections.abc import Sequence
5
6
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Dict, Generic, List, Optional, Sequence, Set, Tuple, TypeVar, Union
7
+ from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
7
8
 
8
9
  import numpy as np
9
10
  import polars as pl
@@ -187,7 +188,7 @@ class SequenceTokenizer:
187
188
  def _group_dataset(
188
189
  self,
189
190
  dataset: Dataset,
190
- ) -> Tuple[SequenceDataFrameLike, Optional[SequenceDataFrameLike], Optional[SequenceDataFrameLike]]:
191
+ ) -> tuple[SequenceDataFrameLike, Optional[SequenceDataFrameLike], Optional[SequenceDataFrameLike]]:
191
192
  from replay.data.nn.utils import ensure_pandas, groupby_sequences
192
193
 
193
194
  grouped_interactions = groupby_sequences(
@@ -268,13 +269,13 @@ class SequenceTokenizer:
268
269
  tensor_schema: "TensorSchema",
269
270
  query_id_column: str,
270
271
  item_id_column: str,
271
- ) -> Set[str]:
272
+ ) -> set[str]:
272
273
  # We need only features, which related to tensor schema, otherwise feature should
273
274
  # be ignored for efficiency reasons. The code below does feature filtering, and
274
275
  # keeps features used as a source in tensor schema.
275
276
 
276
277
  # Query and item IDs are always needed
277
- features_subset: List[str] = [
278
+ features_subset: list[str] = [
278
279
  query_id_column,
279
280
  item_id_column,
280
281
  ]
@@ -303,7 +304,7 @@ class SequenceTokenizer:
303
304
  msg = "All tensor features must have sources defined"
304
305
  raise ValueError(msg)
305
306
 
306
- source_tables: List[FeatureSource] = [s.source for s in feature_sources]
307
+ source_tables: list[FeatureSource] = [s.source for s in feature_sources]
307
308
 
308
309
  unexpected_tables = list(filter(lambda x: not isinstance(x, FeatureSource), source_tables))
309
310
  if len(unexpected_tables) > 0:
@@ -327,7 +328,7 @@ class SequenceTokenizer:
327
328
  tensor_features_to_keep: Optional[Sequence[str]] = None,
328
329
  ) -> None:
329
330
  # Check if all source columns specified in tensor schema exist in provided data frames
330
- sources_for_tensors: List["TensorFeatureSource"] = []
331
+ sources_for_tensors: list["TensorFeatureSource"] = []
331
332
  for tensor_feature_name, tensor_feature in tensor_schema.items():
332
333
  if tensor_features_to_keep is not None and tensor_feature_name not in tensor_features_to_keep:
333
334
  continue
@@ -421,7 +422,7 @@ class SequenceTokenizer:
421
422
 
422
423
  if not use_pickle:
423
424
  base_path = Path(path).with_suffix(".replay").resolve()
424
- with open(base_path / "init_args.json", "r") as file:
425
+ with open(base_path / "init_args.json") as file:
425
426
  tokenizer_dict = json.loads(file.read())
426
427
 
427
428
  # load tensor_schema, tensor_features
@@ -625,7 +626,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
625
626
  """
626
627
  :returns: processed Pandas DataFrame with all features from tensor schema.
627
628
  """
628
- all_features: Dict[str, Union[np.ndarray, List[np.ndarray]]] = {}
629
+ all_features: dict[str, Union[np.ndarray, list[np.ndarray]]] = {}
629
630
  all_features[self._query_id_column] = self._grouped_interactions[self._query_id_column].values
630
631
 
631
632
  for tensor_feature_name in self._tensor_schema:
@@ -635,7 +636,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
635
636
 
636
637
  def _process_num_interaction_feature(
637
638
  self, tensor_feature: "TensorFeatureInfo"
638
- ) -> Union[List[np.ndarray], List[List]]:
639
+ ) -> Union[list[np.ndarray], list[list]]:
639
640
  """
640
641
  Process numerical interaction feature.
641
642
 
@@ -656,7 +657,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
656
657
  values.append(np.array(sequence))
657
658
  return values
658
659
 
659
- def _process_num_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[List[np.ndarray], List[List]]:
660
+ def _process_num_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[list[np.ndarray], list[list]]:
660
661
  """
661
662
  Process numerical feature from item features dataset.
662
663
 
@@ -682,7 +683,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
682
683
 
683
684
  return values
684
685
 
685
- def _process_num_query_feature(self, tensor_feature: "TensorFeatureInfo") -> List[np.ndarray]:
686
+ def _process_num_query_feature(self, tensor_feature: "TensorFeatureInfo") -> list[np.ndarray]:
686
687
  """
687
688
  Process numerical feature from query features dataset.
688
689
 
@@ -694,7 +695,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
694
695
 
695
696
  def _process_cat_interaction_feature(
696
697
  self, tensor_feature: "TensorFeatureInfo"
697
- ) -> Union[List[np.ndarray], List[List]]:
698
+ ) -> Union[list[np.ndarray], list[list]]:
698
699
  """
699
700
  Process categorical interaction feature.
700
701
 
@@ -715,7 +716,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
715
716
  values.append(np.array(sequence))
716
717
  return values
717
718
 
718
- def _process_cat_query_feature(self, tensor_feature: "TensorFeatureInfo") -> List[np.ndarray]:
719
+ def _process_cat_query_feature(self, tensor_feature: "TensorFeatureInfo") -> list[np.ndarray]:
719
720
  """
720
721
  Process categorical feature from query features dataset.
721
722
 
@@ -744,7 +745,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
744
745
  ]
745
746
  return [np.array([query_feature[i]]).reshape(-1) for i in range(len(self._grouped_interactions))]
746
747
 
747
- def _process_cat_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[List[np.ndarray], List[List]]:
748
+ def _process_cat_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[list[np.ndarray], list[list]]:
748
749
  """
749
750
  Process categorical feature from item features dataset.
750
751
 
@@ -760,7 +761,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
760
761
  assert source is not None
761
762
 
762
763
  item_feature = self._item_features[source.column]
763
- values: List[np.ndarray] = []
764
+ values: list[np.ndarray] = []
764
765
 
765
766
  for item_id_sequence in self._grouped_interactions[self._item_id_column]:
766
767
  feature_sequence = item_feature.loc[item_id_sequence].values
@@ -1,7 +1,7 @@
1
1
  import abc
2
2
  import json
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Tuple, Union
4
+ from typing import TYPE_CHECKING, Union
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -90,7 +90,7 @@ class SequentialDataset(abc.ABC):
90
90
  @staticmethod
91
91
  def keep_common_query_ids(
92
92
  lhs: "SequentialDataset", rhs: "SequentialDataset"
93
- ) -> Tuple["SequentialDataset", "SequentialDataset"]:
93
+ ) -> tuple["SequentialDataset", "SequentialDataset"]:
94
94
  """
95
95
  Returns `SequentialDatasets` that contain query ids from both datasets.
96
96
 
@@ -203,7 +203,7 @@ class PandasSequentialDataset(SequentialDataset):
203
203
  from replay.data.nn import TensorSchema
204
204
 
205
205
  base_path = Path(path).with_suffix(".replay").resolve()
206
- with open(base_path / "init_args.json", "r") as file:
206
+ with open(base_path / "init_args.json") as file:
207
207
  sequential_dict = json.loads(file.read())
208
208
 
209
209
  sequences = pd.read_json(base_path / sequential_dict["init_args"]["sequences_path"])
@@ -287,7 +287,7 @@ class PolarsSequentialDataset(PandasSequentialDataset):
287
287
  from replay.data.nn import TensorSchema
288
288
 
289
289
  base_path = Path(path).with_suffix(".replay").resolve()
290
- with open(base_path / "init_args.json", "r") as file:
290
+ with open(base_path / "init_args.json") as file:
291
291
  sequential_dict = json.loads(file.read())
292
292
 
293
293
  sequences = pl.DataFrame(pd.read_json(base_path / sequential_dict["init_args"]["sequences_path"]))
@@ -1,4 +1,5 @@
1
- from typing import TYPE_CHECKING, Generator, NamedTuple, Optional, Sequence, Tuple, Union, cast
1
+ from collections.abc import Generator, Sequence
2
+ from typing import TYPE_CHECKING, NamedTuple, Optional, Union, cast
2
3
 
3
4
  import numpy as np
4
5
  import torch
@@ -110,7 +111,7 @@ class TorchSequentialDataset(TorchDataset):
110
111
  return sequence
111
112
 
112
113
  # form shape for padded_sequence. Now supported one and two-dimentions features
113
- padded_sequence_shape: Union[Tuple[int, int], Tuple[int]]
114
+ padded_sequence_shape: Union[tuple[int, int], tuple[int]]
114
115
  if len(sequence.shape) == 1:
115
116
  padded_sequence_shape = (self._max_sequence_length,)
116
117
  elif len(sequence.shape) == 2:
@@ -134,10 +135,10 @@ class TorchSequentialDataset(TorchDataset):
134
135
  return torch.float32
135
136
  assert False, "Unknown tensor feature type"
136
137
 
137
- def _build_index2sequence_map(self) -> Sequence[Tuple[int, int]]:
138
+ def _build_index2sequence_map(self) -> Sequence[tuple[int, int]]:
138
139
  return list(self._iter_with_window())
139
140
 
140
- def _iter_with_window(self) -> Generator[Tuple[int, int], None, None]:
141
+ def _iter_with_window(self) -> Generator[tuple[int, int], None, None]:
141
142
  for i in range(len(self._sequential)):
142
143
  actual_seq_len = self._sequential.get_sequence_length(i)
143
144
  left_seq_len = actual_seq_len - self._max_sequence_length
@@ -1,4 +1,5 @@
1
- from typing import Iterable, Optional
1
+ from collections.abc import Iterable
2
+ from typing import Optional
2
3
 
3
4
  import polars as pl
4
5
 
@@ -1,18 +1,9 @@
1
+ from collections.abc import ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView
1
2
  from enum import Enum
2
3
  from typing import (
3
4
  Callable,
4
- Dict,
5
- ItemsView,
6
- Iterable,
7
- Iterator,
8
- KeysView,
9
- List,
10
- Mapping,
11
5
  Optional,
12
- Sequence,
13
- Set,
14
6
  Union,
15
- ValuesView,
16
7
  )
17
8
 
18
9
 
@@ -162,7 +153,7 @@ class FeatureSchema(Mapping[str, FeatureInfo]):
162
153
  in original schema to keep in subset.
163
154
  :returns: new feature schema of given features.
164
155
  """
165
- features: Set[FeatureInfo] = set()
156
+ features: set[FeatureInfo] = set()
166
157
  for feature_column in features_to_keep:
167
158
  if feature_column in self._features_schema:
168
159
  features.add(self._features_schema[feature_column])
@@ -438,7 +429,7 @@ class FeatureSchema(Mapping[str, FeatureInfo]):
438
429
  """
439
430
  unique_columns = set()
440
431
  duplicates = set()
441
- item_query_names: Dict[FeatureHint, List[str]] = {
432
+ item_query_names: dict[FeatureHint, list[str]] = {
442
433
  FeatureHint.ITEM_ID: [],
443
434
  FeatureHint.QUERY_ID: [],
444
435
  }
@@ -0,0 +1,62 @@
1
+ """
2
+ Most metrics require dataframe with recommendations
3
+ and dataframe with ground truth values —
4
+ which objects each user interacted with.
5
+
6
+ - recommendations (Union[pandas.DataFrame, spark.DataFrame]):
7
+ predictions of a recommender system,
8
+ DataFrame with columns ``[user_id, item_id, relevance]``
9
+ - ground_truth (Union[pandas.DataFrame, spark.DataFrame]):
10
+ test data, DataFrame with columns
11
+ ``[user_id, item_id, timestamp, relevance]``
12
+
13
+ Metric is calculated for all users, presented in ``ground_truth``
14
+ for accurate metric calculation in case when the recommender system generated
15
+ recommendation not for all users. It is assumed, that all users,
16
+ we want to calculate metric for, have positive interactions.
17
+
18
+ But if we have users, who observed the recommendations, but have not responded,
19
+ those users will be ignored and metric will be overestimated.
20
+ For such case we propose additional optional parameter ``ground_truth_users``,
21
+ the dataframe with all users, which should be considered during the metric calculation.
22
+
23
+ - ground_truth_users (Optional[Union[pandas.DataFrame, spark.DataFrame]]):
24
+ full list of users to calculate metric for, DataFrame with ``user_id`` column
25
+
26
+ Every metric is calculated using top ``K`` items for each user.
27
+ It is also possible to calculate metrics
28
+ using multiple values for ``K`` simultaneously.
29
+ In this case the result will be a dictionary and not a number.
30
+
31
+ Make sure your recommendations do not contain user-item duplicates
32
+ as duplicates could lead to the wrong calculation results.
33
+
34
+ - k (Union[Iterable[int], int]):
35
+ a single number or a list, specifying the
36
+ truncation length for recommendation list for each user
37
+
38
+ By default, metrics are averaged by users,
39
+ but you can alternatively use method ``metric.median``.
40
+ Also, you can get the lower bound
41
+ of ``conf_interval`` for a given ``alpha``.
42
+
43
+ Diversity metrics require extra parameters on initialization stage,
44
+ but do not use ``ground_truth`` parameter.
45
+
46
+ For each metric, a formula for its calculation is given, because this is
47
+ important for the correct comparison of algorithms, as mentioned in our
48
+ `article <https://arxiv.org/abs/2206.12858>`_.
49
+ """
50
+
51
+ from replay.experimental.metrics.base_metric import Metric, NCISMetric
52
+ from replay.experimental.metrics.coverage import Coverage
53
+ from replay.experimental.metrics.hitrate import HitRate
54
+ from replay.experimental.metrics.map import MAP
55
+ from replay.experimental.metrics.mrr import MRR
56
+ from replay.experimental.metrics.ncis_precision import NCISPrecision
57
+ from replay.experimental.metrics.ndcg import NDCG
58
+ from replay.experimental.metrics.precision import Precision
59
+ from replay.experimental.metrics.recall import Recall
60
+ from replay.experimental.metrics.rocauc import RocAuc
61
+ from replay.experimental.metrics.surprisal import Surprisal
62
+ from replay.experimental.metrics.unexpectedness import Unexpectedness