replay-rec 0.18.0__tar.gz → 0.18.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {replay_rec-0.18.0 → replay_rec-0.18.1}/PKG-INFO +73 -60
  2. {replay_rec-0.18.0 → replay_rec-0.18.1}/README.md +66 -56
  3. {replay_rec-0.18.0 → replay_rec-0.18.1}/pyproject.toml +8 -4
  4. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/__init__.py +1 -1
  5. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/dataset.py +27 -1
  6. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/dataset_utils/dataset_label_encoder.py +6 -3
  7. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/nn/schema.py +37 -16
  8. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/nn/sequence_tokenizer.py +313 -165
  9. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/nn/torch_sequential_dataset.py +17 -8
  10. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/nn/utils.py +14 -7
  11. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/schema.py +10 -6
  12. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/offline_metrics.py +2 -2
  13. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/__init__.py +1 -0
  14. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/base_rec.py +18 -21
  15. replay_rec-0.18.1/replay/models/lin_ucb.py +407 -0
  16. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/dataset.py +17 -4
  17. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/lightning.py +121 -54
  18. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/model.py +21 -0
  19. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/callbacks/prediction_callbacks.py +5 -1
  20. replay_rec-0.18.1/replay/models/nn/sequential/compiled/__init__.py +5 -0
  21. replay_rec-0.18.1/replay/models/nn/sequential/compiled/base_compiled_model.py +261 -0
  22. replay_rec-0.18.1/replay/models/nn/sequential/compiled/bert4rec_compiled.py +152 -0
  23. replay_rec-0.18.1/replay/models/nn/sequential/compiled/sasrec_compiled.py +145 -0
  24. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/postprocessors/postprocessors.py +27 -1
  25. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/dataset.py +17 -1
  26. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/lightning.py +126 -50
  27. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/model.py +3 -4
  28. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/preprocessing/__init__.py +7 -1
  29. replay_rec-0.18.1/replay/preprocessing/discretizer.py +719 -0
  30. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/preprocessing/label_encoder.py +384 -52
  31. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/cold_user_random_splitter.py +1 -1
  32. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/__init__.py +1 -0
  33. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/common.py +7 -8
  34. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/session_handler.py +3 -4
  35. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/spark_utils.py +15 -1
  36. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/types.py +8 -0
  37. {replay_rec-0.18.0 → replay_rec-0.18.1}/LICENSE +0 -0
  38. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/__init__.py +0 -0
  39. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/dataset_utils/__init__.py +0 -0
  40. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/nn/__init__.py +0 -0
  41. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/nn/sequential_dataset.py +0 -0
  42. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/data/spark_schema.py +0 -0
  43. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/__init__.py +0 -0
  44. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/base_metric.py +0 -0
  45. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/categorical_diversity.py +0 -0
  46. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/coverage.py +0 -0
  47. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/descriptors.py +0 -0
  48. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/experiment.py +0 -0
  49. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/hitrate.py +0 -0
  50. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/map.py +0 -0
  51. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/mrr.py +0 -0
  52. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/ndcg.py +0 -0
  53. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/novelty.py +0 -0
  54. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/precision.py +0 -0
  55. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/recall.py +0 -0
  56. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/rocauc.py +0 -0
  57. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/surprisal.py +0 -0
  58. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/torch_metrics_builder.py +0 -0
  59. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/metrics/unexpectedness.py +0 -0
  60. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/als.py +0 -0
  61. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/association_rules.py +0 -0
  62. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/base_neighbour_rec.py +0 -0
  63. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/cat_pop_rec.py +0 -0
  64. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/cluster.py +0 -0
  65. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/__init__.py +0 -0
  66. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/__init__.py +0 -0
  67. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/ann_mixin.py +0 -0
  68. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/__init__.py +0 -0
  69. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/base_hnsw_param.py +0 -0
  70. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/hnswlib_param.py +0 -0
  71. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -0
  72. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/__init__.py +0 -0
  73. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/base_index_builder.py +0 -0
  74. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +0 -0
  75. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +0 -0
  76. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +0 -0
  77. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +0 -0
  78. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +0 -0
  79. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/__init__.py +0 -0
  80. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/base_inferer.py +0 -0
  81. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +0 -0
  82. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +0 -0
  83. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +0 -0
  84. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +0 -0
  85. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_inferers/utils.py +0 -0
  86. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/__init__.py +0 -0
  87. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/base_index_store.py +0 -0
  88. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/hdfs_index_store.py +0 -0
  89. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/shared_disk_index_store.py +0 -0
  90. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/spark_files_index_store.py +0 -0
  91. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/index_stores/utils.py +0 -0
  92. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/extensions/ann/utils.py +0 -0
  93. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/kl_ucb.py +0 -0
  94. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/knn.py +0 -0
  95. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/__init__.py +0 -0
  96. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/optimizer_utils/__init__.py +0 -0
  97. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/optimizer_utils/optimizer_factory.py +0 -0
  98. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/__init__.py +0 -0
  99. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/bert4rec/__init__.py +0 -0
  100. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/callbacks/__init__.py +0 -0
  101. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/callbacks/validation_callback.py +0 -0
  102. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/postprocessors/__init__.py +0 -0
  103. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/postprocessors/_base.py +0 -0
  104. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/nn/sequential/sasrec/__init__.py +0 -0
  105. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/pop_rec.py +0 -0
  106. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/query_pop_rec.py +0 -0
  107. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/random_rec.py +0 -0
  108. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/slim.py +0 -0
  109. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/thompson_sampling.py +0 -0
  110. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/ucb.py +0 -0
  111. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/wilson.py +0 -0
  112. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/models/word2vec.py +0 -0
  113. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/optimization/__init__.py +0 -0
  114. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/optimization/optuna_objective.py +0 -0
  115. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/preprocessing/converter.py +0 -0
  116. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/preprocessing/filters.py +0 -0
  117. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/preprocessing/history_based_fp.py +0 -0
  118. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/preprocessing/sessionizer.py +0 -0
  119. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/scenarios/__init__.py +0 -0
  120. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/scenarios/fallback.py +0 -0
  121. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/__init__.py +0 -0
  122. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/base_splitter.py +0 -0
  123. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/k_folds.py +0 -0
  124. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/last_n_splitter.py +0 -0
  125. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/new_users_splitter.py +0 -0
  126. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/random_splitter.py +0 -0
  127. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/ratio_splitter.py +0 -0
  128. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/time_splitter.py +0 -0
  129. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/splitters/two_stage_splitter.py +0 -0
  130. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/dataframe_bucketizer.py +0 -0
  131. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/distributions.py +0 -0
  132. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/model_handler.py +0 -0
  133. {replay_rec-0.18.0 → replay_rec-0.18.1}/replay/utils/time.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: replay-rec
3
- Version: 0.18.0
3
+ Version: 0.18.1
4
4
  Summary: RecSys Library
5
5
  Home-page: https://sb-ai-lab.github.io/RePlay/
6
6
  License: Apache-2.0
@@ -21,10 +21,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
21
  Provides-Extra: all
22
22
  Provides-Extra: spark
23
23
  Provides-Extra: torch
24
+ Provides-Extra: torch-openvino
24
25
  Requires-Dist: fixed-install-nmslib (==2.1.2)
25
26
  Requires-Dist: hnswlib (>=0.7.0,<0.8.0)
26
- Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "all"
27
+ Requires-Dist: lightning (>=2.0.2,<=2.4.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
27
28
  Requires-Dist: numpy (>=1.20.0)
29
+ Requires-Dist: onnx (>=1.16.2,<1.17.0) ; extra == "torch-openvino" or extra == "all"
30
+ Requires-Dist: openvino (>=2024.3.0,<2024.4.0) ; extra == "torch-openvino" or extra == "all"
28
31
  Requires-Dist: optuna (>=3.2.0,<3.3.0)
29
32
  Requires-Dist: pandas (>=1.3.5,<=2.2.2)
30
33
  Requires-Dist: polars (>=1.0.0,<1.1.0)
@@ -32,10 +35,10 @@ Requires-Dist: psutil (>=6.0.0,<6.1.0)
32
35
  Requires-Dist: pyarrow (>=12.0.1)
33
36
  Requires-Dist: pyspark (>=3.0,<3.6) ; (python_full_version >= "3.8.1" and python_version < "3.11") and (extra == "spark" or extra == "all")
34
37
  Requires-Dist: pyspark (>=3.4,<3.6) ; (python_version >= "3.11" and python_version < "3.12") and (extra == "spark" or extra == "all")
35
- Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "all"
38
+ Requires-Dist: pytorch-ranger (>=0.1.1,<0.2.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
36
39
  Requires-Dist: scikit-learn (>=1.0.2,<2.0.0)
37
40
  Requires-Dist: scipy (>=1.8.1,<2.0.0)
38
- Requires-Dist: torch (>=1.8,<=2.4.0) ; extra == "torch" or extra == "all"
41
+ Requires-Dist: torch (>=1.8,<=2.5.0) ; extra == "torch" or extra == "torch-openvino" or extra == "all"
39
42
  Project-URL: Repository, https://github.com/sb-ai-lab/RePlay
40
43
  Description-Content-Type: text/markdown
41
44
 
@@ -44,11 +47,15 @@ Description-Content-Type: text/markdown
44
47
 
45
48
  [![GitHub License](https://img.shields.io/github/license/sb-ai-lab/RePlay)](https://github.com/sb-ai-lab/RePlay/blob/main/LICENSE)
46
49
  [![PyPI - Version](https://img.shields.io/pypi/v/replay-rec)](https://pypi.org/project/replay-rec)
50
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://sb-ai-lab.github.io/RePlay/)
47
51
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/replay-rec)](https://pypistats.org/packages/replay-rec)
48
52
  <br>
49
53
  [![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/sb-ai-lab/replay/main.yml)](https://github.com/sb-ai-lab/RePlay/actions/workflows/main.yml?query=branch%3Amain)
54
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
55
+ [![Python Versions](https://img.shields.io/pypi/pyversions/replay-rec.svg?logo=python&logoColor=white)](https://pypi.org/project/replay-rec)
50
56
  [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/sb-ai-lab/RePlay/discussions)
51
57
 
58
+
52
59
  RePlay is an advanced framework designed to facilitate the development and evaluation of recommendation systems. It provides a robust set of tools covering the entire lifecycle of a recommendation system pipeline:
53
60
 
54
61
  ## 🚀 Features:
@@ -63,61 +70,25 @@ RePlay is an advanced framework designed to facilitate the development and evalu
63
70
  1. **Diverse Hardware Support:** Compatible with various hardware configurations including CPU, GPU, Multi-GPU.
64
71
  2. **Cluster Computing Integration:** Integrating with PySpark for distributed computing, enabling scalability for large-scale recommendation systems.
65
72
 
66
- ## 📖 Documentation is available [here](https://sb-ai-lab.github.io/RePlay/).
67
-
68
73
  <a name="toc"></a>
69
74
  # Table of Contents
70
75
 
71
- * [Installation](#installation)
72
76
  * [Quickstart](#quickstart)
77
+ * [Installation](#installation)
73
78
  * [Resources](#examples)
74
79
  * [Contributing to RePlay](#contributing)
75
80
 
76
81
 
77
- <a name="installation"></a>
78
- ## 🔧 Installation
79
-
80
- Installation via `pip` package manager is recommended by default:
81
-
82
- ```bash
83
- pip install replay-rec
84
- ```
85
-
86
- In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
87
- Also `experimental` submodule will not be installed.
88
-
89
- To install `experimental` submodule please specify the version with `rc0` suffix.
90
- For example:
91
-
92
- ```bash
93
- pip install replay-rec==XX.YY.ZZrc0
94
- ```
95
-
96
- ### Extras
97
-
98
- In addition to the core package, several extras are also provided, including:
99
- - `[spark]`: Install PySpark functionality
100
- - `[torch]`: Install PyTorch and Lightning functionality
101
- - `[all]`: `[spark]` `[torch]`
82
+ <a name="quickstart"></a>
83
+ ## 📈 Quickstart
102
84
 
103
- Example:
104
85
  ```bash
105
- # Install core package with PySpark dependency
106
- pip install replay-rec[spark]
107
-
108
- # Install package with experimental submodule and PySpark dependency
109
- pip install replay-rec[spark]==XX.YY.ZZrc0
86
+ pip install replay-rec[all]
110
87
  ```
111
88
 
112
- To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
113
-
114
- If you encounter an error during RePlay installation, check the [troubleshooting](https://sb-ai-lab.github.io/RePlay/pages/installation.html#troubleshooting) guide.
115
-
116
-
117
- <a name="quickstart"></a>
118
- ## 📈 Quickstart (PySpark-based)
119
-
89
+ Pyspark-based model and [fast](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) polars-based data preprocessing:
120
90
  ```python
91
+ from polars import from_pandas
121
92
  from rs_datasets import MovieLens
122
93
 
123
94
  from replay.data import Dataset, FeatureHint, FeatureInfo, FeatureSchema, FeatureType
@@ -131,10 +102,10 @@ from replay.splitters import RatioSplitter
131
102
  spark = State().session
132
103
 
133
104
  ml_1m = MovieLens("1m")
134
- K=10
105
+ K = 10
135
106
 
136
- # data preprocessing
137
- interactions = convert2spark(ml_1m.ratings)
107
+ # convert data to polars
108
+ interactions = from_pandas(ml_1m.ratings)
138
109
 
139
110
  # data splitting
140
111
  splitter = RatioSplitter(
@@ -148,7 +119,7 @@ splitter = RatioSplitter(
148
119
  )
149
120
  train, test = splitter.split(interactions)
150
121
 
151
- # dataset creating
122
+ # datasets creation
152
123
  feature_schema = FeatureSchema(
153
124
  [
154
125
  FeatureInfo(
@@ -174,20 +145,18 @@ feature_schema = FeatureSchema(
174
145
  ]
175
146
  )
176
147
 
177
- train_dataset = Dataset(
178
- feature_schema=feature_schema,
179
- interactions=train,
180
- )
181
- test_dataset = Dataset(
182
- feature_schema=feature_schema,
183
- interactions=test,
184
- )
148
+ train_dataset = Dataset(feature_schema=feature_schema, interactions=train)
149
+ test_dataset = Dataset(feature_schema=feature_schema, interactions=test)
185
150
 
186
151
  # data encoding
187
152
  encoder = DatasetLabelEncoder()
188
153
  train_dataset = encoder.fit_transform(train_dataset)
189
154
  test_dataset = encoder.transform(test_dataset)
190
155
 
156
+ # convert datasets to spark
157
+ train_dataset.to_spark()
158
+ test_dataset.to_spark()
159
+
191
160
  # model training
192
161
  model = ItemKNN()
193
162
  model.fit(train_dataset)
@@ -214,6 +183,44 @@ metrics.add_result("ItemKNN", recs)
214
183
  print(metrics.results)
215
184
  ```
216
185
 
186
+ <a name="installation"></a>
187
+ ## 🔧 Installation
188
+
189
+ Installation via `pip` package manager is recommended by default:
190
+
191
+ ```bash
192
+ pip install replay-rec
193
+ ```
194
+
195
+ In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
196
+ Also `experimental` submodule will not be installed.
197
+
198
+ To install `experimental` submodule please specify the version with `rc0` suffix.
199
+ For example:
200
+
201
+ ```bash
202
+ pip install replay-rec==XX.YY.ZZrc0
203
+ ```
204
+
205
+ ### Extras
206
+
207
+ In addition to the core package, several extras are also provided, including:
208
+ - `[spark]`: Install PySpark functionality
209
+ - `[torch]`: Install PyTorch and Lightning functionality
210
+ - `[all]`: `[spark]` `[torch]`
211
+
212
+ Example:
213
+ ```bash
214
+ # Install core package with PySpark dependency
215
+ pip install replay-rec[spark]
216
+
217
+ # Install package with experimental submodule and PySpark dependency
218
+ pip install replay-rec[spark]==XX.YY.ZZrc0
219
+ ```
220
+
221
+ To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
222
+
223
+
217
224
  <a name="examples"></a>
218
225
  ## 📑 Resources
219
226
 
@@ -226,14 +233,19 @@ print(metrics.results)
226
233
  6. [06_item2item_recommendations.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/06_item2item_recommendations.ipynb) - Item to Item recommendations example.
227
234
  7. [07_filters.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/07_filters.ipynb) - An example of using filters.
228
235
  8. [08_recommending_for_categories.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/08_recommending_for_categories.ipynb) - An example of recommendation for product categories.
229
- 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformers to generate recommendations.
230
-
236
+ 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformer-based SASRec model to generate recommendations.
237
+ 10. [10_bert4rec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/10_bert4rec_example.ipynb) - An example of using transformer-based BERT4Rec model to generate recommendations.
238
+ 11. [11_sasrec_dataframes_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) - speed comparison of using different frameworks (pandas, polars, pyspark) for data processing during SASRec training.
239
+ 12. [12_neural_ts_exp.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/12_neural_ts_exp.ipynb) - An example of using Neural Thompson Sampling bandit model (based on Wide&Deep architecture).
240
+ 13. [13_personalized_bandit_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/13_personalized_bandit_comparison.ipynb) - A comparison of context-free and contextual bandit models.
241
+ 14. [14_hierarchical_recommender.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/14_hierarchical_recommender.ipynb) - An example of using HierarchicalRecommender with user-disjoint LinUCB.
231
242
 
232
243
  ### Videos and papers
233
244
  * **Video guides**:
234
245
  - [Replay for offline recommendations, AI Journey 2021](https://www.youtube.com/watch?v=ejQZKGAG0xs)
235
246
 
236
247
  * **Research papers**:
248
+ - [RePlay: a Recommendation Framework for Experimentation and Production Use](https://arxiv.org/abs/2409.07272) Alexey Vasilev, Anna Volodkevich, Denis Kulandin, Tatiana Bysheva, Anton Klenitskiy. In The 18th ACM Conference on Recommender Systems (RecSys '24)
237
249
  - [Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?](https://doi.org/10.1145/3604915.3610644) Anton Klenitskiy, Alexey Vasilev. In The 17th ACM Conference on Recommender Systems (RecSys '23)
238
250
  - [The Long Tail of Context: Does it Exist and Matter?](https://arxiv.org/abs/2210.01023). Konstantin Bauman, Alexey Vasilev, Alexander Tuzhilin. In Workshop on Context-Aware Recommender Systems (CARS) (RecSys '22)
239
251
  - [Multiobjective Evaluation of Reinforcement Learning Based Recommender Systems](https://doi.org/10.1145/3523227.3551485). Alexey Grishanov, Anastasia Ianina, Konstantin Vorontsov. In The 16th ACM Conference on Recommender Systems (RecSys '22)
@@ -244,3 +256,4 @@ print(metrics.results)
244
256
 
245
257
  We welcome community contributions. For details please check our [contributing guidelines](CONTRIBUTING.md).
246
258
 
259
+
@@ -3,11 +3,15 @@
3
3
 
4
4
  [![GitHub License](https://img.shields.io/github/license/sb-ai-lab/RePlay)](https://github.com/sb-ai-lab/RePlay/blob/main/LICENSE)
5
5
  [![PyPI - Version](https://img.shields.io/pypi/v/replay-rec)](https://pypi.org/project/replay-rec)
6
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://sb-ai-lab.github.io/RePlay/)
6
7
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/replay-rec)](https://pypistats.org/packages/replay-rec)
7
8
  <br>
8
9
  [![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/sb-ai-lab/replay/main.yml)](https://github.com/sb-ai-lab/RePlay/actions/workflows/main.yml?query=branch%3Amain)
10
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
11
+ [![Python Versions](https://img.shields.io/pypi/pyversions/replay-rec.svg?logo=python&logoColor=white)](https://pypi.org/project/replay-rec)
9
12
  [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/sb-ai-lab/RePlay/discussions)
10
13
 
14
+
11
15
  RePlay is an advanced framework designed to facilitate the development and evaluation of recommendation systems. It provides a robust set of tools covering the entire lifecycle of a recommendation system pipeline:
12
16
 
13
17
  ## 🚀 Features:
@@ -22,61 +26,25 @@ RePlay is an advanced framework designed to facilitate the development and evalu
22
26
  1. **Diverse Hardware Support:** Compatible with various hardware configurations including CPU, GPU, Multi-GPU.
23
27
  2. **Cluster Computing Integration:** Integrating with PySpark for distributed computing, enabling scalability for large-scale recommendation systems.
24
28
 
25
- ## 📖 Documentation is available [here](https://sb-ai-lab.github.io/RePlay/).
26
-
27
29
  <a name="toc"></a>
28
30
  # Table of Contents
29
31
 
30
- * [Installation](#installation)
31
32
  * [Quickstart](#quickstart)
33
+ * [Installation](#installation)
32
34
  * [Resources](#examples)
33
35
  * [Contributing to RePlay](#contributing)
34
36
 
35
37
 
36
- <a name="installation"></a>
37
- ## 🔧 Installation
38
-
39
- Installation via `pip` package manager is recommended by default:
40
-
41
- ```bash
42
- pip install replay-rec
43
- ```
44
-
45
- In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
46
- Also `experimental` submodule will not be installed.
47
-
48
- To install `experimental` submodule please specify the version with `rc0` suffix.
49
- For example:
50
-
51
- ```bash
52
- pip install replay-rec==XX.YY.ZZrc0
53
- ```
54
-
55
- ### Extras
56
-
57
- In addition to the core package, several extras are also provided, including:
58
- - `[spark]`: Install PySpark functionality
59
- - `[torch]`: Install PyTorch and Lightning functionality
60
- - `[all]`: `[spark]` `[torch]`
38
+ <a name="quickstart"></a>
39
+ ## 📈 Quickstart
61
40
 
62
- Example:
63
41
  ```bash
64
- # Install core package with PySpark dependency
65
- pip install replay-rec[spark]
66
-
67
- # Install package with experimental submodule and PySpark dependency
68
- pip install replay-rec[spark]==XX.YY.ZZrc0
42
+ pip install replay-rec[all]
69
43
  ```
70
44
 
71
- To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
72
-
73
- If you encounter an error during RePlay installation, check the [troubleshooting](https://sb-ai-lab.github.io/RePlay/pages/installation.html#troubleshooting) guide.
74
-
75
-
76
- <a name="quickstart"></a>
77
- ## 📈 Quickstart (PySpark-based)
78
-
45
+ Pyspark-based model and [fast](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) polars-based data preprocessing:
79
46
  ```python
47
+ from polars import from_pandas
80
48
  from rs_datasets import MovieLens
81
49
 
82
50
  from replay.data import Dataset, FeatureHint, FeatureInfo, FeatureSchema, FeatureType
@@ -90,10 +58,10 @@ from replay.splitters import RatioSplitter
90
58
  spark = State().session
91
59
 
92
60
  ml_1m = MovieLens("1m")
93
- K=10
61
+ K = 10
94
62
 
95
- # data preprocessing
96
- interactions = convert2spark(ml_1m.ratings)
63
+ # convert data to polars
64
+ interactions = from_pandas(ml_1m.ratings)
97
65
 
98
66
  # data splitting
99
67
  splitter = RatioSplitter(
@@ -107,7 +75,7 @@ splitter = RatioSplitter(
107
75
  )
108
76
  train, test = splitter.split(interactions)
109
77
 
110
- # dataset creating
78
+ # datasets creation
111
79
  feature_schema = FeatureSchema(
112
80
  [
113
81
  FeatureInfo(
@@ -133,20 +101,18 @@ feature_schema = FeatureSchema(
133
101
  ]
134
102
  )
135
103
 
136
- train_dataset = Dataset(
137
- feature_schema=feature_schema,
138
- interactions=train,
139
- )
140
- test_dataset = Dataset(
141
- feature_schema=feature_schema,
142
- interactions=test,
143
- )
104
+ train_dataset = Dataset(feature_schema=feature_schema, interactions=train)
105
+ test_dataset = Dataset(feature_schema=feature_schema, interactions=test)
144
106
 
145
107
  # data encoding
146
108
  encoder = DatasetLabelEncoder()
147
109
  train_dataset = encoder.fit_transform(train_dataset)
148
110
  test_dataset = encoder.transform(test_dataset)
149
111
 
112
+ # convert datasets to spark
113
+ train_dataset.to_spark()
114
+ test_dataset.to_spark()
115
+
150
116
  # model training
151
117
  model = ItemKNN()
152
118
  model.fit(train_dataset)
@@ -173,6 +139,44 @@ metrics.add_result("ItemKNN", recs)
173
139
  print(metrics.results)
174
140
  ```
175
141
 
142
+ <a name="installation"></a>
143
+ ## 🔧 Installation
144
+
145
+ Installation via `pip` package manager is recommended by default:
146
+
147
+ ```bash
148
+ pip install replay-rec
149
+ ```
150
+
151
+ In this case it will be installed the `core` package without `PySpark` and `PyTorch` dependencies.
152
+ Also `experimental` submodule will not be installed.
153
+
154
+ To install `experimental` submodule please specify the version with `rc0` suffix.
155
+ For example:
156
+
157
+ ```bash
158
+ pip install replay-rec==XX.YY.ZZrc0
159
+ ```
160
+
161
+ ### Extras
162
+
163
+ In addition to the core package, several extras are also provided, including:
164
+ - `[spark]`: Install PySpark functionality
165
+ - `[torch]`: Install PyTorch and Lightning functionality
166
+ - `[all]`: `[spark]` `[torch]`
167
+
168
+ Example:
169
+ ```bash
170
+ # Install core package with PySpark dependency
171
+ pip install replay-rec[spark]
172
+
173
+ # Install package with experimental submodule and PySpark dependency
174
+ pip install replay-rec[spark]==XX.YY.ZZrc0
175
+ ```
176
+
177
+ To build RePlay from sources please use the [instruction](CONTRIBUTING.md#installing-from-the-source).
178
+
179
+
176
180
  <a name="examples"></a>
177
181
  ## 📑 Resources
178
182
 
@@ -185,14 +189,19 @@ print(metrics.results)
185
189
  6. [06_item2item_recommendations.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/06_item2item_recommendations.ipynb) - Item to Item recommendations example.
186
190
  7. [07_filters.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/07_filters.ipynb) - An example of using filters.
187
191
  8. [08_recommending_for_categories.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/08_recommending_for_categories.ipynb) - An example of recommendation for product categories.
188
- 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformers to generate recommendations.
189
-
192
+ 9. [09_sasrec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/09_sasrec_example.ipynb) - An example of using transformer-based SASRec model to generate recommendations.
193
+ 10. [10_bert4rec_example.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/10_bert4rec_example.ipynb) - An example of using transformer-based BERT4Rec model to generate recommendations.
194
+ 11. [11_sasrec_dataframes_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/11_sasrec_dataframes_comparison.ipynb) - speed comparison of using different frameworks (pandas, polars, pyspark) for data processing during SASRec training.
195
+ 12. [12_neural_ts_exp.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/12_neural_ts_exp.ipynb) - An example of using Neural Thompson Sampling bandit model (based on Wide&Deep architecture).
196
+ 13. [13_personalized_bandit_comparison.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/13_personalized_bandit_comparison.ipynb) - A comparison of context-free and contextual bandit models.
197
+ 14. [14_hierarchical_recommender.ipynb](https://github.com/sb-ai-lab/RePlay/blob/main/examples/14_hierarchical_recommender.ipynb) - An example of using HierarchicalRecommender with user-disjoint LinUCB.
190
198
 
191
199
  ### Videos and papers
192
200
  * **Video guides**:
193
201
  - [Replay for offline recommendations, AI Journey 2021](https://www.youtube.com/watch?v=ejQZKGAG0xs)
194
202
 
195
203
  * **Research papers**:
204
+ - [RePlay: a Recommendation Framework for Experimentation and Production Use](https://arxiv.org/abs/2409.07272) Alexey Vasilev, Anna Volodkevich, Denis Kulandin, Tatiana Bysheva, Anton Klenitskiy. In The 18th ACM Conference on Recommender Systems (RecSys '24)
196
205
  - [Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?](https://doi.org/10.1145/3604915.3610644) Anton Klenitskiy, Alexey Vasilev. In The 17th ACM Conference on Recommender Systems (RecSys '23)
197
206
  - [The Long Tail of Context: Does it Exist and Matter?](https://arxiv.org/abs/2210.01023). Konstantin Bauman, Alexey Vasilev, Alexander Tuzhilin. In Workshop on Context-Aware Recommender Systems (CARS) (RecSys '22)
198
207
  - [Multiobjective Evaluation of Reinforcement Learning Based Recommender Systems](https://doi.org/10.1145/3523227.3551485). Alexey Grishanov, Anastasia Ianina, Konstantin Vorontsov. In The 16th ACM Conference on Recommender Systems (RecSys '22)
@@ -202,3 +211,4 @@ print(metrics.results)
202
211
  ## 💡 Contributing to RePlay
203
212
 
204
213
  We welcome community contributions. For details please check our [contributing guidelines](CONTRIBUTING.md).
214
+
@@ -41,7 +41,7 @@ exclude = [
41
41
  "replay/conftest.py",
42
42
  "replay/experimental",
43
43
  ]
44
- version = "0.18.0"
44
+ version = "0.18.1"
45
45
 
46
46
  [tool.poetry.dependencies]
47
47
  python = ">=3.8.1, <3.12"
@@ -53,11 +53,13 @@ scipy = "^1.8.1"
53
53
  psutil = "~6.0.0"
54
54
  scikit-learn = "^1.0.2"
55
55
  pyarrow = ">=12.0.1"
56
+ openvino = {version = "~2024.3.0", optional = true}
57
+ onnx = {version = "~1.16.2", optional = true}
56
58
  pyspark = [
57
59
  {version = ">=3.4,<3.6", python = ">=3.11,<3.12", optional = true},
58
60
  {version = ">=3.0,<3.6", python = ">=3.8.1,<3.11", optional = true},
59
61
  ]
60
- torch = {version = ">=1.8, <=2.4.0", optional = true}
62
+ torch = {version = ">=1.8, <=2.5.0", optional = true}
61
63
  lightning = {version = ">=2.0.2, <=2.4.0", optional = true}
62
64
  pytorch-ranger = {version = "^0.1.1", optional = true}
63
65
  fixed-install-nmslib = "2.1.2"
@@ -66,7 +68,8 @@ hnswlib = "^0.7.0"
66
68
  [tool.poetry.extras]
67
69
  spark = ["pyspark"]
68
70
  torch = ["torch", "pytorch-ranger", "lightning"]
69
- all = ["pyspark", "torch", "pytorch-ranger", "lightning"]
71
+ torch-openvino = ["torch", "pytorch-ranger", "lightning", "openvino", "onnx"]
72
+ all = ["pyspark", "torch", "pytorch-ranger", "lightning", "openvino", "onnx"]
70
73
 
71
74
  [tool.poetry.group.dev.dependencies]
72
75
  jupyter = "~1.0.0"
@@ -85,10 +88,11 @@ myst-parser = "1.0.0"
85
88
  ghp-import = "2.1.0"
86
89
  docutils = "0.16"
87
90
  data-science-types = "0.2.23"
91
+ filelock = "~3.14.0"
88
92
 
89
93
  [tool.poetry-dynamic-versioning]
90
94
  enable = false
91
- format-jinja = """0.18.0{{ env['PACKAGE_SUFFIX'] }}"""
95
+ format-jinja = """0.18.1{{ env['PACKAGE_SUFFIX'] }}"""
92
96
  vcs = "git"
93
97
 
94
98
  [tool.ruff]
@@ -1,3 +1,3 @@
1
1
  """ RecSys library """
2
2
 
3
- __version__ = "0.18.0"
3
+ __version__ = "0.18.1"
@@ -458,13 +458,23 @@ class Dataset:
458
458
  if feature.feature_hint in [FeatureHint.ITEM_ID, FeatureHint.QUERY_ID]:
459
459
  return nunique(self._ids_feature_map[feature.feature_hint], column)
460
460
  assert feature.feature_source
461
+ if feature.feature_type == FeatureType.CATEGORICAL_LIST:
462
+ if self.is_spark:
463
+ data = (
464
+ self._feature_source_map[feature.feature_source]
465
+ .select(column)
466
+ .withColumn(column, sf.explode(column))
467
+ )
468
+ else:
469
+ data = self._feature_source_map[feature.feature_source][[column]].explode(column)
470
+ return nunique(data, column)
461
471
  return nunique(self._feature_source_map[feature.feature_source], column)
462
472
 
463
473
  return callback
464
474
 
465
475
  def _set_cardinality(self, features_list: Sequence[FeatureInfo]) -> None:
466
476
  for feature in features_list:
467
- if feature.feature_type == FeatureType.CATEGORICAL:
477
+ if feature.feature_type in [FeatureType.CATEGORICAL, FeatureType.CATEGORICAL_LIST]:
468
478
  feature._set_cardinality_callback(self._get_cardinality(feature))
469
479
 
470
480
  def _fill_feature_schema(self, feature_schema: FeatureSchema) -> FeatureSchema:
@@ -581,6 +591,7 @@ class Dataset:
581
591
  data: DataFrameLike,
582
592
  column: str,
583
593
  source: FeatureSource,
594
+ feature_type: FeatureType,
584
595
  cardinality: Optional[int],
585
596
  ) -> None:
586
597
  """
@@ -593,6 +604,16 @@ class Dataset:
593
604
  Option: Keep this criterion, but suggest the user to disable the check if he understands
594
605
  that the criterion will not pass.
595
606
  """
607
+ if feature_type == FeatureType.CATEGORICAL_LIST: # explode column if list
608
+ data = data.withColumn(column, sf.explode(column)) if self.is_spark else data[[column]].explode(column)
609
+
610
+ if self.is_pandas:
611
+ try:
612
+ data[column] = data[column].astype(int)
613
+ except Exception:
614
+ msg = f"IDs in {source.name}.{column} are not encoded. They are not int."
615
+ raise ValueError(msg)
616
+
596
617
  if self.is_pandas:
597
618
  is_int = np.issubdtype(dict(data.dtypes)[column], int)
598
619
  elif self.is_spark:
@@ -632,6 +653,7 @@ class Dataset:
632
653
  self.interactions,
633
654
  feature.column,
634
655
  FeatureSource.INTERACTIONS,
656
+ feature.feature_type,
635
657
  feature.cardinality,
636
658
  )
637
659
  if self.item_features is not None:
@@ -639,6 +661,7 @@ class Dataset:
639
661
  self.item_features,
640
662
  feature.column,
641
663
  FeatureSource.ITEM_FEATURES,
664
+ feature.feature_type,
642
665
  feature.cardinality,
643
666
  )
644
667
  elif feature.feature_hint == FeatureHint.QUERY_ID:
@@ -646,6 +669,7 @@ class Dataset:
646
669
  self.interactions,
647
670
  feature.column,
648
671
  FeatureSource.INTERACTIONS,
672
+ feature.feature_type,
649
673
  feature.cardinality,
650
674
  )
651
675
  if self.query_features is not None:
@@ -653,6 +677,7 @@ class Dataset:
653
677
  self.query_features,
654
678
  feature.column,
655
679
  FeatureSource.QUERY_FEATURES,
680
+ feature.feature_type,
656
681
  feature.cardinality,
657
682
  )
658
683
  else:
@@ -661,6 +686,7 @@ class Dataset:
661
686
  data,
662
687
  feature.column,
663
688
  feature.feature_source,
689
+ feature.feature_type,
664
690
  feature.cardinality,
665
691
  )
666
692
 
@@ -8,8 +8,8 @@ Contains classes for encoding categorical data
8
8
  import warnings
9
9
  from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union
10
10
 
11
- from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource
12
- from replay.preprocessing import LabelEncoder, LabelEncodingRule
11
+ from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
12
+ from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
13
13
  from replay.preprocessing.label_encoder import HandleUnknownStrategies
14
14
 
15
15
 
@@ -62,7 +62,10 @@ class DatasetLabelEncoder:
62
62
 
63
63
  self._fill_features_columns(dataset.feature_schema)
64
64
  for column, feature_info in dataset.feature_schema.categorical_features.items():
65
- encoding_rule = LabelEncodingRule(
65
+ encoding_rule_class = (
66
+ SequenceEncodingRule if feature_info.feature_type == FeatureType.CATEGORICAL_LIST else LabelEncodingRule
67
+ )
68
+ encoding_rule = encoding_rule_class(
66
69
  column, handle_unknown=self._handle_unknown_rule, default_value=self._default_value_rule
67
70
  )
68
71
  if feature_info.feature_hint == FeatureHint.QUERY_ID: