recnexteval 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. recnexteval/__init__.py +20 -0
  2. recnexteval/algorithms/__init__.py +99 -0
  3. recnexteval/algorithms/base.py +377 -0
  4. recnexteval/algorithms/baseline/__init__.py +10 -0
  5. recnexteval/algorithms/baseline/decay_popularity.py +110 -0
  6. recnexteval/algorithms/baseline/most_popular.py +72 -0
  7. recnexteval/algorithms/baseline/random.py +39 -0
  8. recnexteval/algorithms/baseline/recent_popularity.py +34 -0
  9. recnexteval/algorithms/itemknn/__init__.py +14 -0
  10. recnexteval/algorithms/itemknn/itemknn.py +119 -0
  11. recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
  12. recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
  13. recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
  14. recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
  15. recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
  16. recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
  17. recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
  18. recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
  19. recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
  20. recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
  21. recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
  22. recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
  23. recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
  24. recnexteval/algorithms/utils.py +51 -0
  25. recnexteval/datasets/__init__.py +109 -0
  26. recnexteval/datasets/base.py +316 -0
  27. recnexteval/datasets/config/__init__.py +113 -0
  28. recnexteval/datasets/config/amazon.py +188 -0
  29. recnexteval/datasets/config/base.py +72 -0
  30. recnexteval/datasets/config/lastfm.py +105 -0
  31. recnexteval/datasets/config/movielens.py +169 -0
  32. recnexteval/datasets/config/yelp.py +25 -0
  33. recnexteval/datasets/datasets/__init__.py +24 -0
  34. recnexteval/datasets/datasets/amazon.py +151 -0
  35. recnexteval/datasets/datasets/base.py +250 -0
  36. recnexteval/datasets/datasets/lastfm.py +121 -0
  37. recnexteval/datasets/datasets/movielens.py +93 -0
  38. recnexteval/datasets/datasets/test.py +46 -0
  39. recnexteval/datasets/datasets/yelp.py +103 -0
  40. recnexteval/datasets/metadata/__init__.py +58 -0
  41. recnexteval/datasets/metadata/amazon.py +68 -0
  42. recnexteval/datasets/metadata/base.py +38 -0
  43. recnexteval/datasets/metadata/lastfm.py +110 -0
  44. recnexteval/datasets/metadata/movielens.py +87 -0
  45. recnexteval/evaluators/__init__.py +189 -0
  46. recnexteval/evaluators/accumulator.py +167 -0
  47. recnexteval/evaluators/base.py +216 -0
  48. recnexteval/evaluators/builder/__init__.py +125 -0
  49. recnexteval/evaluators/builder/base.py +166 -0
  50. recnexteval/evaluators/builder/pipeline.py +111 -0
  51. recnexteval/evaluators/builder/stream.py +54 -0
  52. recnexteval/evaluators/evaluator_pipeline.py +287 -0
  53. recnexteval/evaluators/evaluator_stream.py +374 -0
  54. recnexteval/evaluators/state_management.py +310 -0
  55. recnexteval/evaluators/strategy.py +32 -0
  56. recnexteval/evaluators/util.py +124 -0
  57. recnexteval/matrix/__init__.py +48 -0
  58. recnexteval/matrix/exception.py +5 -0
  59. recnexteval/matrix/interaction_matrix.py +784 -0
  60. recnexteval/matrix/prediction_matrix.py +153 -0
  61. recnexteval/matrix/util.py +24 -0
  62. recnexteval/metrics/__init__.py +57 -0
  63. recnexteval/metrics/binary/__init__.py +4 -0
  64. recnexteval/metrics/binary/hit.py +49 -0
  65. recnexteval/metrics/core/__init__.py +10 -0
  66. recnexteval/metrics/core/base.py +126 -0
  67. recnexteval/metrics/core/elementwise_top_k.py +75 -0
  68. recnexteval/metrics/core/listwise_top_k.py +72 -0
  69. recnexteval/metrics/core/top_k.py +60 -0
  70. recnexteval/metrics/core/util.py +29 -0
  71. recnexteval/metrics/ranking/__init__.py +6 -0
  72. recnexteval/metrics/ranking/dcg.py +55 -0
  73. recnexteval/metrics/ranking/ndcg.py +78 -0
  74. recnexteval/metrics/ranking/precision.py +51 -0
  75. recnexteval/metrics/ranking/recall.py +42 -0
  76. recnexteval/models/__init__.py +4 -0
  77. recnexteval/models/base.py +69 -0
  78. recnexteval/preprocessing/__init__.py +37 -0
  79. recnexteval/preprocessing/filter.py +181 -0
  80. recnexteval/preprocessing/preprocessor.py +137 -0
  81. recnexteval/registries/__init__.py +67 -0
  82. recnexteval/registries/algorithm.py +68 -0
  83. recnexteval/registries/base.py +131 -0
  84. recnexteval/registries/dataset.py +37 -0
  85. recnexteval/registries/metric.py +57 -0
  86. recnexteval/settings/__init__.py +127 -0
  87. recnexteval/settings/base.py +414 -0
  88. recnexteval/settings/exception.py +8 -0
  89. recnexteval/settings/leave_n_out_setting.py +48 -0
  90. recnexteval/settings/processor.py +115 -0
  91. recnexteval/settings/schema.py +11 -0
  92. recnexteval/settings/single_time_point_setting.py +111 -0
  93. recnexteval/settings/sliding_window_setting.py +153 -0
  94. recnexteval/settings/splitters/__init__.py +14 -0
  95. recnexteval/settings/splitters/base.py +57 -0
  96. recnexteval/settings/splitters/n_last.py +39 -0
  97. recnexteval/settings/splitters/n_last_timestamp.py +76 -0
  98. recnexteval/settings/splitters/timestamp.py +82 -0
  99. recnexteval/settings/util.py +0 -0
  100. recnexteval/utils/__init__.py +115 -0
  101. recnexteval/utils/json_to_csv_converter.py +128 -0
  102. recnexteval/utils/logging_tools.py +159 -0
  103. recnexteval/utils/path.py +155 -0
  104. recnexteval/utils/url_certificate_installer.py +54 -0
  105. recnexteval/utils/util.py +166 -0
  106. recnexteval/utils/uuid_util.py +7 -0
  107. recnexteval/utils/yaml_tool.py +65 -0
  108. recnexteval-0.1.0.dist-info/METADATA +85 -0
  109. recnexteval-0.1.0.dist-info/RECORD +110 -0
  110. recnexteval-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,260 @@
1
+ # RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+ import numpy as np
9
+ from numpy.typing import ArrayLike
10
+
11
+
12
+ class DecayFunction:
13
+ def __call__(self, time_distances: ArrayLike) -> ArrayLike:
14
+ """Apply the decay.
15
+
16
+ :param time_distances: array of distances to be decayed.
17
+ :type time_distances: ArrayLike
18
+ :returns: Array of event ages to which decays have been applied.
19
+ :rtype: ArrayLike
20
+ """
21
+ raise NotImplementedError()
22
+
23
+
24
+ class ExponentialDecay(DecayFunction):
25
+ """Applies exponential decay.
26
+
27
+ For each value x in ``time_distances`` the decayed value is computed as
28
+
29
+ .. math::
30
+
31
+ f(x) = e^{-\\alpha * x}
32
+
33
+ where alpha is the decay parameter.
34
+
35
+ :param decay: Exponential decay parameter, should be in the [0, 1] interval.
36
+ :type decay: float
37
+ """
38
+
39
+ @classmethod
40
+ def validate_decay(cls, decay: float) -> None:
41
+ """Verify if the decay parameter is in the right range for this decay function."""
42
+ if not (0 <= decay <= 1):
43
+ raise ValueError(f"Decay parameter = {decay} is not in the supported range: [0, 1].")
44
+
45
+ def __init__(self, decay: float):
46
+ self.validate_decay(decay)
47
+ self.decay = decay
48
+
49
+ def __call__(self, time_distances: ArrayLike) -> ArrayLike:
50
+ """Apply the decay function.
51
+
52
+ :param time_distances: array of distances to be decayed.
53
+ :type time_distances: ArrayLike
54
+ :returns: The decayed time array.
55
+ :rtype: ArrayLike
56
+ """
57
+
58
+ return np.exp(-self.decay * time_distances)
59
+
60
+
61
+ class ConvexDecay(DecayFunction):
62
+ """Applies a convex decay function.
63
+
64
+ For each value x in the ``time_distances`` the decayed value is computed as
65
+
66
+ .. math::
67
+
68
+ f(x) = \\alpha^{x}
69
+
70
+ where :math:`alpha` is the decay parameter.
71
+
72
+ :param decay: The decay parameter, should be in the ]0, 1] interval.
73
+ :type decay: float
74
+ """
75
+
76
+ @classmethod
77
+ def validate_decay(cls, decay: float):
78
+ """Verify if the decay parameter is in the right range for this decay function."""
79
+ if not (0 < decay <= 1):
80
+ raise ValueError(f"Decay parameter = {decay} is not in the supported range: ]0, 1].")
81
+
82
+ def __init__(self, decay: float):
83
+ self.validate_decay(decay)
84
+ self.decay = decay
85
+
86
+ def __call__(self, time_distances: ArrayLike):
87
+ """Apply the decay function.
88
+
89
+ :param time_distances: array of distances to be decayed.
90
+ :type time_distances: ArrayLike
91
+ :returns: The decayed time array.
92
+ :rtype: ArrayLike
93
+ """
94
+
95
+ return np.power(self.decay, time_distances)
96
+
97
+
98
+ class ConcaveDecay(DecayFunction):
99
+ """Applies a concave decay function.
100
+
101
+ For each value x in the ``time_distances`` the decayed value is computed as
102
+
103
+ .. math::
104
+
105
+ f(x) = 1 - \\alpha^{1-\\frac{x}{N}}
106
+
107
+ where :math:`alpha` is the decay parameter and :math:`N` is the ``max_distance`` parameter.
108
+
109
+ :param decay: The decay parameter, should be in the [0, 1[ interval.
110
+ :type decay: float
111
+ :param max_distance: Normalizing parameter, to put distances in the [0, 1].
112
+ :type max_distance: float
113
+ """
114
+
115
+ @classmethod
116
+ def validate_decay(cls, decay: float):
117
+ """Verify if the decay parameter is in the right range for this decay function."""
118
+ if not (0 < decay <= 1):
119
+ raise ValueError(f"Decay parameter = {decay} is not in the supported range: ]0, 1].")
120
+
121
+ def __init__(self, decay: float, max_distance: float):
122
+ self.validate_decay(decay)
123
+ self.decay = decay
124
+ self.max_distance = max_distance
125
+
126
+ def __call__(self, time_distances: ArrayLike):
127
+ """Apply the decay function.
128
+
129
+ :param time_distances: array of distances to be decayed.
130
+ :type time_distances: ArrayLike
131
+ :returns: The decayed array.
132
+ :rtype: ArrayLike
133
+ """
134
+ if (time_distances > self.max_distance).any():
135
+ raise ValueError(
136
+ "At least one of the distances is bigger than the specified max_distance."
137
+ )
138
+ return 1 - np.power(self.decay, 1 - (time_distances / self.max_distance))
139
+
140
+
141
+ class LogDecay(DecayFunction):
142
+ """Applies a logarithmic decay function.
143
+
144
+ For each value x in the ``time_distances`` the decayed value is computed as
145
+
146
+ .. math::
147
+
148
+ f(x) = log_\\alpha ((\\alpha-1)(1-\\frac{x}{N}) + 1)
149
+
150
+ where :math:`alpha` is the decay parameter and :math:`N` is the ``max_distance`` parameter.
151
+
152
+ :param decay: The decay parameter, should be in the range ]1, inf[
153
+ :type decay: float
154
+ :param max_distance: Normalizing parameter, to put distances in the [0, 1].
155
+ :type max_distance: float
156
+ """
157
+
158
+ @classmethod
159
+ def validate_decay(cls, decay: float):
160
+ """Verify if the decay parameter is in the right range for this decay function."""
161
+ if not (1 < decay):
162
+ raise ValueError(f"Decay parameter = {decay} is not in the supported range: ]1, inf[.")
163
+
164
+ def __init__(self, decay: float, max_distance: float):
165
+ self.validate_decay(decay)
166
+ self.decay = decay
167
+ self.max_distance = max_distance
168
+
169
+ def __call__(self, time_distances: ArrayLike):
170
+ """Apply the decay function.
171
+
172
+ :param time_distances: array of distances to be decayed.
173
+ :type time_distances: ArrayLike
174
+ :returns: The decayed time array.
175
+ :rtype: ArrayLike
176
+ """
177
+ if (time_distances > self.max_distance).any():
178
+ raise ValueError(
179
+ "At least one of the distances is bigger than the specified max_distance."
180
+ )
181
+ return np.log(((self.decay - 1) * (1 - time_distances / self.max_distance)) + 1) / np.log(
182
+ self.decay
183
+ )
184
+
185
+
186
+ class LinearDecay(DecayFunction):
187
+ """Applies a linear decay function.
188
+
189
+ For each value x in the ``time_distances`` the decayed value is computed as
190
+
191
+ .. math::
192
+
193
+ f(x) = \\max(1 - (\\frac{x}{N}) \\alpha, 0)
194
+
195
+ where :math:`alpha` is the decay parameter and :math:`N` is the ``max_distance`` parameter.
196
+
197
+ :param decay: The decay parameter, should be in the [0, inf[ interval.
198
+ :type decay: float
199
+ :param max_distance: Normalizing parameter, to put distances in the [0, 1].
200
+ :type max_distance: float
201
+ """
202
+
203
+ @classmethod
204
+ def validate_decay(cls, decay: float):
205
+ if not (0 <= decay):
206
+ raise ValueError(f"Decay parameter = {decay} is not in the supported range: [0, +inf[.")
207
+
208
+ def __init__(self, decay: float, max_distance: float):
209
+ self.validate_decay(decay)
210
+ self.decay = decay
211
+ self.max_distance = max_distance
212
+
213
+ def __call__(self, time_distances: ArrayLike):
214
+ """Apply the decay function.
215
+
216
+ :param time_distances: array of distances to be decayed.
217
+ :type time_distances: ArrayLike
218
+ :returns: The decayed time array.
219
+ :rtype: ArrayLike
220
+ """
221
+ if (time_distances > self.max_distance).any():
222
+ raise ValueError(
223
+ "At least one of the distances is bigger than the specified max_distance."
224
+ )
225
+ results = 1 - (time_distances / self.max_distance) * self.decay
226
+ results[results < 0] = 0
227
+ return results
228
+
229
+
230
+ class InverseDecay(DecayFunction):
231
+ """Invert the scores.
232
+
233
+ Decay parameter only added for interface unity.
234
+ For each value x in the ``time_distances`` the decayed value is computed as
235
+
236
+ .. math::
237
+
238
+ f(x) = \\frac{1}{x}
239
+ """
240
+
241
+ def __call__(self, time_distances: ArrayLike):
242
+ """Apply the decay function.
243
+
244
+ :param time_distances: array of distances to be decayed.
245
+ :type time_distances: ArrayLike
246
+ :returns: The decayed time array.
247
+ :rtype: ArrayLike
248
+ """
249
+
250
+ results = time_distances.astype(float).copy()
251
+ results[results > 0] = 1 / results[results > 0]
252
+ results[results == 0] = 1
253
+ return results
254
+
255
+
256
+ class NoDecay(ExponentialDecay):
257
+ """Turns the array into a binary array."""
258
+
259
+ def __init__(self):
260
+ super().__init__(0)
@@ -0,0 +1,52 @@
1
+ # Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+ from recnexteval.algorithms.time_aware_item_knn.base import TARSItemKNN
9
+
10
+
11
+ class TARSItemKNNDing(TARSItemKNN):
12
+ """Time aware variant of ItemKNN which uses an exponential decay function at prediction time and cosine similarity.
13
+
14
+ Algorithm as presented in
15
+ Yi Ding and Xue Li. 2005.
16
+ Time weight collaborative filtering.
17
+ In Proceedings of the 14th ACM international conference on Information and knowledge management (CIKM '05).
18
+ Association for Computing Machinery, New York, NY, USA, 485–492.
19
+ https://doi.org/10.1145/1099554.1099689
20
+
21
+
22
+ Computation of the similarity matrix is the same as normal ItemKNN.
23
+ When predicting however the user's older interactions are given less weight in the final prediction score.
24
+
25
+ .. math::
26
+
27
+ \\text{sim}(u, i) = \\sum\\limits_{j \\in X_u} e^{-\\alpha \\cdot \\delta t_{u,j}} \\cdot \\text{sim}(i, j)
28
+
29
+ Where :math:`\\alpha` is the `predict_decay` parameter.
30
+
31
+ :param K: How many neigbours to use per item,
32
+ make sure to pick a value below the number of columns of the matrix to fit on.
33
+ Defaults to 200
34
+ :type K: int, Optional
35
+ :param pad_with_popularity: Whether to pad the similarity matrix with RecentPop Algorithm.
36
+ Defaults to True.
37
+ :type pad_with_popularity: bool, optional
38
+ :param predict_decay: Defines the decay scaling used for decay during prediction.
39
+ Defaults to 1 / (24 * 3600).
40
+ This means for every day since an interaction, the value of it will be divided by 'e'.
41
+ :type predict_decay: float, optional
42
+ :param similarity: Which similarity measure to use. Defaults to `"cosine"`.
43
+ ``["cosine", "conditional_probability"]`` are supported.
44
+ :type similarity: str, optional
45
+
46
+ This code is adapted from RecPack :cite:`recpack`
47
+ """
48
+
49
+ SUPPORTED_SIMILARITIES = ["cosine", "conditional_probability"]
50
+
51
+ def __init__(self, K: int = 200, pad_with_popularity: bool = True, predict_decay: float = 1 / (24 * 3600), similarity: str = "cosine"):
52
+ super().__init__(K=K, pad_with_popularity=pad_with_popularity, fit_decay=0, predict_decay=predict_decay, similarity=similarity, decay_function="exponential")
@@ -0,0 +1,65 @@
1
+ # Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+ """Module with time-dependent ItemKNN implementations"""
9
+
10
+ from recnexteval.algorithms.time_aware_item_knn.base import TARSItemKNN
11
+
12
+
13
+ class TARSItemKNNLiu(TARSItemKNN):
14
+ """Time aware variant of ItemKNN which uses an exponential decay function and cosine similarity.
15
+
16
+ Algorithm as described in
17
+ Nathan N. Liu, Min Zhao, Evan Xiang, and Qiang Yang.
18
+ 2010. Online evolutionary collaborative filtering.
19
+ In Proceedings of the fourth ACM conference on Recommender systems (RecSys '10).
20
+ Association for Computing Machinery, New York, NY, USA, 95–102.
21
+ https://doi.org/10.1145/1864708.1864729
22
+
23
+ The algorithm uses an exponential decay function:
24
+
25
+ .. math::
26
+
27
+ \\Gamma(x) = e^{- \\alpha \\cdot \\text{x}}
28
+
29
+ where :math:`\\alpha` is the decay scaling parameter,
30
+ and x is the time between the maximal timestamp in the matrix
31
+ and the timestamp of the event.
32
+
33
+ Similarity is computed on this weighted matrix, using cosine similarity.
34
+ At prediction time a user's history is weighted using the same formula with a different alpha.
35
+ This weighted history is then multiplied with the precomputed similarity matrix.
36
+
37
+ :param K: How many neigbours to use per item,
38
+ make sure to pick a value below the number of columns of the matrix to fit on.
39
+ Defaults to 200
40
+ :type K: int, optional
41
+ :param pad_with_popularity: Whether to pad the similarity matrix with RecentPop Algorithm.
42
+ Defaults to True.
43
+ :type pad_with_popularity: bool, optional
44
+ :param fit_decay: Defines the decay scaling used for decay during model fitting.
45
+ Defaults to 1 / (24 * 3600).
46
+ This means for every day since an interaction, the value of it will be divided by 'e'.
47
+ :type fit_decay: float, optional
48
+ :param predict_decay: Defines the decay scaling used for decay during prediction.
49
+ Defaults to 1 / (24 * 3600).
50
+ This means for every day since an interaction, the value of it will be divided by 'e'.
51
+ :type predict_decay: float, optional
52
+
53
+ This code is adapted from RecPack :cite:`recpack`
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ K: int = 200,
59
+ pad_with_popularity: bool = True,
60
+ fit_decay: float = 1 / (24 * 3600),
61
+ predict_decay: float = 1 / (24 * 3600),
62
+ ):
63
+ super().__init__(
64
+ K=K, pad_with_popularity=pad_with_popularity, fit_decay=fit_decay, predict_decay=predict_decay, similarity="cosine", decay_function="exponential"
65
+ )
@@ -0,0 +1,106 @@
1
+ # RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+
9
+ from scipy.sparse import csr_matrix, diags
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+
12
+ from recnexteval.utils import invert, to_binary
13
+
14
+
15
+ def compute_conditional_probability(X: csr_matrix, pop_discount: float = 0) -> csr_matrix:
16
+ """Compute conditional probability like similarity.
17
+
18
+ Computation using equation (3) from the original ItemKNN paper.
19
+ 'Item-based top-n recommendation algorithms.'
20
+ Deshpande, Mukund, and George Karypis
21
+
22
+ .. math ::
23
+ sim(i,j) = \\frac{\\sum\\limits_{u \\in U} \\mathbb{I}_{u,i} X_{u,j}}{Freq(i) \\times Freq(j)^{\\alpha}}
24
+
25
+ Where :math:`\\mathbb{I}_{ui}` is 1 if the user u has visited item i, and 0 otherwise.
26
+ And alpha is the pop_discount parameter.
27
+ Note that this is a non-symmetric similarity measure.
28
+ Given that X is a binary matrix, and alpha is set to 0,
29
+ this simplifies to pure conditional probability.
30
+
31
+ .. math::
32
+ sim(i,j) = \\frac{Freq(i \\land j)}{Freq(i)}
33
+
34
+ :param X: user x item matrix with scores per user, item pair.
35
+ :type X: csr_matrix
36
+ :param pop_discount: Parameter defining popularity discount. Defaults to 0
37
+ :type pop_discount: float, Optional.
38
+ """
39
+ # matrix with co_mat_i,j = SUM(1_u,i * X_u,j for each user u)
40
+ # If the input matrix is binary, this is the cooccurence count matrix.
41
+ co_mat = to_binary(X).T @ X
42
+
43
+ # Compute the inverse of the item frequencies
44
+ A = invert(diags(to_binary(X).sum(axis=0).A[0]).tocsr())
45
+
46
+ if pop_discount:
47
+ # This has all item similarities
48
+ # Co_mat is weighted by both the frequencies of item i
49
+ # and the frequency of item j to the pop_discount power.
50
+ # If pop_discount = 1, this similarity is symmetric again.
51
+ item_cond_prob_similarities = A @ co_mat @ A.power(pop_discount)
52
+ else:
53
+ # Weight the co_mat with the amount of occurences of item i.
54
+ item_cond_prob_similarities = A @ co_mat
55
+
56
+ # Set diagonal to 0, because we don't support self similarity
57
+ item_cond_prob_similarities.setdiag(0)
58
+
59
+ return item_cond_prob_similarities
60
+
61
+
62
+ def compute_cosine_similarity(X: csr_matrix) -> csr_matrix:
63
+ """Compute the cosine similarity between the items in the matrix.
64
+
65
+ Self similarity is removed.
66
+
67
+ :param X: user x item matrix with scores per user, item pair.
68
+ :type X: csr_matrix
69
+ :return: similarity matrix
70
+ :rtype: csr_matrix
71
+ """
72
+ # X.T otherwise we are doing a user KNN
73
+ item_cosine_similarities = cosine_similarity(X.T, dense_output=False)
74
+ item_cosine_similarities.setdiag(0)
75
+ # Set diagonal to 0, because we don't want to support self similarity
76
+
77
+ return item_cosine_similarities
78
+
79
+
80
+ def compute_pearson_similarity(X: csr_matrix) -> csr_matrix:
81
+ """Compute the pearson correlation as a similarity between each item in the matrix.
82
+
83
+ Self similarity is removed.
84
+ When computing similarity, the avg of nonzero entries per user is used.
85
+
86
+ :param X: Rating or psuedo rating matrix.
87
+ :type X: csr_matrix
88
+ :return: similarity matrix.
89
+ :rtype: csr_matrix
90
+ """
91
+
92
+ if (X == 1).sum() == X.nnz:
93
+ raise ValueError("Pearson similarity can not be computed on a binary matrix.")
94
+
95
+ count_per_item = (X > 0).sum(axis=0).A
96
+
97
+ avg_per_item = X.sum(axis=0).A.astype(float)
98
+
99
+ avg_per_item[count_per_item > 0] = (
100
+ avg_per_item[count_per_item > 0] / count_per_item[count_per_item > 0]
101
+ )
102
+
103
+ X = X - (X > 0).multiply(avg_per_item)
104
+
105
+ # Given the rescaled matrix, the pearson correlation is just cosine similarity on this matrix.
106
+ return compute_cosine_similarity(X)
@@ -0,0 +1,61 @@
1
+ # RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+ from typing import Optional
9
+
10
+ import numpy as np
11
+ from scipy.sparse import csr_matrix
12
+
13
+
14
+ def get_top_K_ranks(X: csr_matrix, K: Optional[int] = None) -> csr_matrix:
15
+ """Returns a matrix of ranks assigned to the largest K values in X.
16
+
17
+ Selects K largest values for every row in X and assigns a rank to each.
18
+
19
+ :param X: Matrix from which we will select K values in every row.
20
+ :type X: csr_matrix
21
+ :param K: Amount of values to select.
22
+ :type K: int, optional
23
+ :return: Matrix with K values per row.
24
+ :rtype: csr_matrix
25
+ """
26
+ U, I, V = [], [], []
27
+ for row_ix, (le, ri) in enumerate(zip(X.indptr[:-1], X.indptr[1:])):
28
+ K_row_pick = min(K, ri - le) if K is not None else ri - le
29
+
30
+ if K_row_pick != 0:
31
+ top_k_row = X.indices[
32
+ le + np.argpartition(X.data[le:ri], list(range(-K_row_pick, 0)))[-K_row_pick:]
33
+ ]
34
+
35
+ for rank, col_ix in enumerate(reversed(top_k_row)):
36
+ U.append(row_ix)
37
+ I.append(col_ix)
38
+ V.append(rank + 1)
39
+
40
+ X_top_K = csr_matrix((V, (U, I)), shape=X.shape)
41
+
42
+ return X_top_K
43
+
44
+
45
+ def get_top_K_values(X: csr_matrix, K: Optional[int] = None) -> csr_matrix:
46
+ """Returns a matrix of only the K largest values for every row in X.
47
+
48
+ Selects the top-K items for every user (which is equal to the K nearest neighbours.)
49
+ In case of a tie for the last position, the item with the largest index of the tied items is used.
50
+
51
+ :param X: Matrix from which we will select K values in every row.
52
+ :type X: csr_matrix
53
+ :param K: Amount of values to select.
54
+ :type K: int, optional
55
+ :return: Matrix with K values per row.
56
+ :rtype: csr_matrix
57
+ """
58
+ top_K_ranks = get_top_K_ranks(X, K)
59
+ top_K_ranks[top_K_ranks > 0] = 1 # ranks to binary
60
+
61
+ return top_K_ranks.multiply(X) # elementwise multiplication
@@ -0,0 +1,47 @@
1
+ # RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+ import logging
9
+
10
+ import numpy as np
11
+ from scipy.sparse import csr_matrix
12
+
13
+
14
+ EPSILON = 1e-13
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def invert(x: np.ndarray | csr_matrix) -> np.ndarray | csr_matrix:
20
+ """Invert an array.
21
+
22
+ :param x: [description]
23
+ :type x: [type]
24
+ :return: [description]
25
+ :rtype: [type]
26
+ """
27
+ if isinstance(x, np.ndarray):
28
+ ret = np.zeros(x.shape)
29
+ elif isinstance(x, csr_matrix):
30
+ ret = csr_matrix(x.shape)
31
+ else:
32
+ raise TypeError("Unsupported type for argument x.")
33
+ ret[x.nonzero()] = 1 / x[x.nonzero()]
34
+ return ret
35
+
36
+
37
+ def to_binary(X: csr_matrix) -> csr_matrix:
38
+ """Converts a matrix to binary by setting all non-zero values to 1.
39
+
40
+ :param X: Matrix to convert to binary.
41
+ :type X: csr_matrix
42
+ :return: Binary matrix.
43
+ :rtype: csr_matrix
44
+ """
45
+ X_binary = X.astype(bool).astype(X.dtype)
46
+
47
+ return X_binary
@@ -0,0 +1,50 @@
1
+ # Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
2
+ # Copyright (C) 2020 Froomle N.V.
3
+ # License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
4
+ # Author:
5
+ # Lien Michiels
6
+ # Robin Verachtert
7
+
8
+ from recnexteval.algorithms.time_aware_item_knn.base import TARSItemKNN
9
+
10
+
11
+ class TARSItemKNNVaz(TARSItemKNN):
12
+ """Time aware variant of ItemKNN which uses a exponential decay function and pearson similarity.
13
+
14
+ Algorithm as described in
15
+ Understanding the Temporal Dynamics of Recommendations across Different Rating Scales
16
+ Paula Cristina Vaz, Ricardo Ribeiro, David Martins de Matos.
17
+ Late-Breaking Results, Project Papers and Workshop Proceedings of the 21st Conference
18
+ on User Modeling, Adaptation, and Personalization. Rome, Italy, June 10-14, 2013.
19
+
20
+ The algorithm uses an exponential decay function:
21
+
22
+ .. math::
23
+
24
+ \\Gamma(x) = e^{- \\alpha \\cdot \\text{x}}
25
+
26
+ where :math:`\\alpha` is the decay scaling parameter,
27
+ and x is the time between the maximal timestamp in the matrix
28
+ and the timestamp of the event.
29
+
30
+ :param K: How many neigbours to use per item,
31
+ make sure to pick a value below the number of columns of the matrix to fit on.
32
+ Defaults to 200
33
+ :type K: int, optional
34
+ :param pad_with_popularity: Whether to pad the similarity matrix with RecentPop Algorithm.
35
+ Defaults to True.
36
+ :type pad_with_popularity: bool, optional
37
+ :param fit_decay: Defines the decay scaling used for decay during model fitting.
38
+ Defaults to 1/(24*3600).
39
+ This means for every day since an interaction, the value of it will be divided by 'e'.
40
+ :type fit_decay: float, optional
41
+ :param predict_decay: Defines the decay scaling used for decay during prediction.
42
+ Defaults to 1/(24*3600).
43
+ This means for every day since an interaction, the value of it will be divided by 'e'.
44
+ :type predict_decay: float, optional
45
+
46
+ This code is adapted from RecPack :cite:`recpack`
47
+ """
48
+
49
+ def __init__(self, K: int = 200, pad_with_popularity: bool = True, fit_decay: float = 1 / (24 * 3600), predict_decay: float = 1 / (24 * 3600)):
50
+ super().__init__(K, pad_with_popularity=pad_with_popularity, fit_decay=fit_decay, predict_decay=predict_decay, similarity="pearson", decay_function="exponential")