recnexteval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recnexteval/__init__.py +20 -0
- recnexteval/algorithms/__init__.py +99 -0
- recnexteval/algorithms/base.py +377 -0
- recnexteval/algorithms/baseline/__init__.py +10 -0
- recnexteval/algorithms/baseline/decay_popularity.py +110 -0
- recnexteval/algorithms/baseline/most_popular.py +72 -0
- recnexteval/algorithms/baseline/random.py +39 -0
- recnexteval/algorithms/baseline/recent_popularity.py +34 -0
- recnexteval/algorithms/itemknn/__init__.py +14 -0
- recnexteval/algorithms/itemknn/itemknn.py +119 -0
- recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
- recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
- recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
- recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
- recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
- recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
- recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
- recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
- recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
- recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
- recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
- recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
- recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
- recnexteval/algorithms/utils.py +51 -0
- recnexteval/datasets/__init__.py +109 -0
- recnexteval/datasets/base.py +316 -0
- recnexteval/datasets/config/__init__.py +113 -0
- recnexteval/datasets/config/amazon.py +188 -0
- recnexteval/datasets/config/base.py +72 -0
- recnexteval/datasets/config/lastfm.py +105 -0
- recnexteval/datasets/config/movielens.py +169 -0
- recnexteval/datasets/config/yelp.py +25 -0
- recnexteval/datasets/datasets/__init__.py +24 -0
- recnexteval/datasets/datasets/amazon.py +151 -0
- recnexteval/datasets/datasets/base.py +250 -0
- recnexteval/datasets/datasets/lastfm.py +121 -0
- recnexteval/datasets/datasets/movielens.py +93 -0
- recnexteval/datasets/datasets/test.py +46 -0
- recnexteval/datasets/datasets/yelp.py +103 -0
- recnexteval/datasets/metadata/__init__.py +58 -0
- recnexteval/datasets/metadata/amazon.py +68 -0
- recnexteval/datasets/metadata/base.py +38 -0
- recnexteval/datasets/metadata/lastfm.py +110 -0
- recnexteval/datasets/metadata/movielens.py +87 -0
- recnexteval/evaluators/__init__.py +189 -0
- recnexteval/evaluators/accumulator.py +167 -0
- recnexteval/evaluators/base.py +216 -0
- recnexteval/evaluators/builder/__init__.py +125 -0
- recnexteval/evaluators/builder/base.py +166 -0
- recnexteval/evaluators/builder/pipeline.py +111 -0
- recnexteval/evaluators/builder/stream.py +54 -0
- recnexteval/evaluators/evaluator_pipeline.py +287 -0
- recnexteval/evaluators/evaluator_stream.py +374 -0
- recnexteval/evaluators/state_management.py +310 -0
- recnexteval/evaluators/strategy.py +32 -0
- recnexteval/evaluators/util.py +124 -0
- recnexteval/matrix/__init__.py +48 -0
- recnexteval/matrix/exception.py +5 -0
- recnexteval/matrix/interaction_matrix.py +784 -0
- recnexteval/matrix/prediction_matrix.py +153 -0
- recnexteval/matrix/util.py +24 -0
- recnexteval/metrics/__init__.py +57 -0
- recnexteval/metrics/binary/__init__.py +4 -0
- recnexteval/metrics/binary/hit.py +49 -0
- recnexteval/metrics/core/__init__.py +10 -0
- recnexteval/metrics/core/base.py +126 -0
- recnexteval/metrics/core/elementwise_top_k.py +75 -0
- recnexteval/metrics/core/listwise_top_k.py +72 -0
- recnexteval/metrics/core/top_k.py +60 -0
- recnexteval/metrics/core/util.py +29 -0
- recnexteval/metrics/ranking/__init__.py +6 -0
- recnexteval/metrics/ranking/dcg.py +55 -0
- recnexteval/metrics/ranking/ndcg.py +78 -0
- recnexteval/metrics/ranking/precision.py +51 -0
- recnexteval/metrics/ranking/recall.py +42 -0
- recnexteval/models/__init__.py +4 -0
- recnexteval/models/base.py +69 -0
- recnexteval/preprocessing/__init__.py +37 -0
- recnexteval/preprocessing/filter.py +181 -0
- recnexteval/preprocessing/preprocessor.py +137 -0
- recnexteval/registries/__init__.py +67 -0
- recnexteval/registries/algorithm.py +68 -0
- recnexteval/registries/base.py +131 -0
- recnexteval/registries/dataset.py +37 -0
- recnexteval/registries/metric.py +57 -0
- recnexteval/settings/__init__.py +127 -0
- recnexteval/settings/base.py +414 -0
- recnexteval/settings/exception.py +8 -0
- recnexteval/settings/leave_n_out_setting.py +48 -0
- recnexteval/settings/processor.py +115 -0
- recnexteval/settings/schema.py +11 -0
- recnexteval/settings/single_time_point_setting.py +111 -0
- recnexteval/settings/sliding_window_setting.py +153 -0
- recnexteval/settings/splitters/__init__.py +14 -0
- recnexteval/settings/splitters/base.py +57 -0
- recnexteval/settings/splitters/n_last.py +39 -0
- recnexteval/settings/splitters/n_last_timestamp.py +76 -0
- recnexteval/settings/splitters/timestamp.py +82 -0
- recnexteval/settings/util.py +0 -0
- recnexteval/utils/__init__.py +115 -0
- recnexteval/utils/json_to_csv_converter.py +128 -0
- recnexteval/utils/logging_tools.py +159 -0
- recnexteval/utils/path.py +155 -0
- recnexteval/utils/url_certificate_installer.py +54 -0
- recnexteval/utils/util.py +166 -0
- recnexteval/utils/uuid_util.py +7 -0
- recnexteval/utils/yaml_tool.py +65 -0
- recnexteval-0.1.0.dist-info/METADATA +85 -0
- recnexteval-0.1.0.dist-info/RECORD +110 -0
- recnexteval-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from numpy.typing import ArrayLike
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DecayFunction:
|
|
13
|
+
def __call__(self, time_distances: ArrayLike) -> ArrayLike:
|
|
14
|
+
"""Apply the decay.
|
|
15
|
+
|
|
16
|
+
:param time_distances: array of distances to be decayed.
|
|
17
|
+
:type time_distances: ArrayLike
|
|
18
|
+
:returns: Array of event ages to which decays have been applied.
|
|
19
|
+
:rtype: ArrayLike
|
|
20
|
+
"""
|
|
21
|
+
raise NotImplementedError()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ExponentialDecay(DecayFunction):
|
|
25
|
+
"""Applies exponential decay.
|
|
26
|
+
|
|
27
|
+
For each value x in ``time_distances`` the decayed value is computed as
|
|
28
|
+
|
|
29
|
+
.. math::
|
|
30
|
+
|
|
31
|
+
f(x) = e^{-\\alpha * x}
|
|
32
|
+
|
|
33
|
+
where alpha is the decay parameter.
|
|
34
|
+
|
|
35
|
+
:param decay: Exponential decay parameter, should be in the [0, 1] interval.
|
|
36
|
+
:type decay: float
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def validate_decay(cls, decay: float) -> None:
|
|
41
|
+
"""Verify if the decay parameter is in the right range for this decay function."""
|
|
42
|
+
if not (0 <= decay <= 1):
|
|
43
|
+
raise ValueError(f"Decay parameter = {decay} is not in the supported range: [0, 1].")
|
|
44
|
+
|
|
45
|
+
def __init__(self, decay: float):
|
|
46
|
+
self.validate_decay(decay)
|
|
47
|
+
self.decay = decay
|
|
48
|
+
|
|
49
|
+
def __call__(self, time_distances: ArrayLike) -> ArrayLike:
|
|
50
|
+
"""Apply the decay function.
|
|
51
|
+
|
|
52
|
+
:param time_distances: array of distances to be decayed.
|
|
53
|
+
:type time_distances: ArrayLike
|
|
54
|
+
:returns: The decayed time array.
|
|
55
|
+
:rtype: ArrayLike
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
return np.exp(-self.decay * time_distances)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ConvexDecay(DecayFunction):
|
|
62
|
+
"""Applies a convex decay function.
|
|
63
|
+
|
|
64
|
+
For each value x in the ``time_distances`` the decayed value is computed as
|
|
65
|
+
|
|
66
|
+
.. math::
|
|
67
|
+
|
|
68
|
+
f(x) = \\alpha^{x}
|
|
69
|
+
|
|
70
|
+
where :math:`alpha` is the decay parameter.
|
|
71
|
+
|
|
72
|
+
:param decay: The decay parameter, should be in the ]0, 1] interval.
|
|
73
|
+
:type decay: float
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def validate_decay(cls, decay: float):
|
|
78
|
+
"""Verify if the decay parameter is in the right range for this decay function."""
|
|
79
|
+
if not (0 < decay <= 1):
|
|
80
|
+
raise ValueError(f"Decay parameter = {decay} is not in the supported range: ]0, 1].")
|
|
81
|
+
|
|
82
|
+
def __init__(self, decay: float):
|
|
83
|
+
self.validate_decay(decay)
|
|
84
|
+
self.decay = decay
|
|
85
|
+
|
|
86
|
+
def __call__(self, time_distances: ArrayLike):
|
|
87
|
+
"""Apply the decay function.
|
|
88
|
+
|
|
89
|
+
:param time_distances: array of distances to be decayed.
|
|
90
|
+
:type time_distances: ArrayLike
|
|
91
|
+
:returns: The decayed time array.
|
|
92
|
+
:rtype: ArrayLike
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
return np.power(self.decay, time_distances)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ConcaveDecay(DecayFunction):
|
|
99
|
+
"""Applies a concave decay function.
|
|
100
|
+
|
|
101
|
+
For each value x in the ``time_distances`` the decayed value is computed as
|
|
102
|
+
|
|
103
|
+
.. math::
|
|
104
|
+
|
|
105
|
+
f(x) = 1 - \\alpha^{1-\\frac{x}{N}}
|
|
106
|
+
|
|
107
|
+
where :math:`alpha` is the decay parameter and :math:`N` is the ``max_distance`` parameter.
|
|
108
|
+
|
|
109
|
+
:param decay: The decay parameter, should be in the [0, 1[ interval.
|
|
110
|
+
:type decay: float
|
|
111
|
+
:param max_distance: Normalizing parameter, to put distances in the [0, 1].
|
|
112
|
+
:type max_distance: float
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def validate_decay(cls, decay: float):
|
|
117
|
+
"""Verify if the decay parameter is in the right range for this decay function."""
|
|
118
|
+
if not (0 < decay <= 1):
|
|
119
|
+
raise ValueError(f"Decay parameter = {decay} is not in the supported range: ]0, 1].")
|
|
120
|
+
|
|
121
|
+
def __init__(self, decay: float, max_distance: float):
|
|
122
|
+
self.validate_decay(decay)
|
|
123
|
+
self.decay = decay
|
|
124
|
+
self.max_distance = max_distance
|
|
125
|
+
|
|
126
|
+
def __call__(self, time_distances: ArrayLike):
|
|
127
|
+
"""Apply the decay function.
|
|
128
|
+
|
|
129
|
+
:param time_distances: array of distances to be decayed.
|
|
130
|
+
:type time_distances: ArrayLike
|
|
131
|
+
:returns: The decayed array.
|
|
132
|
+
:rtype: ArrayLike
|
|
133
|
+
"""
|
|
134
|
+
if (time_distances > self.max_distance).any():
|
|
135
|
+
raise ValueError(
|
|
136
|
+
"At least one of the distances is bigger than the specified max_distance."
|
|
137
|
+
)
|
|
138
|
+
return 1 - np.power(self.decay, 1 - (time_distances / self.max_distance))
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class LogDecay(DecayFunction):
|
|
142
|
+
"""Applies a logarithmic decay function.
|
|
143
|
+
|
|
144
|
+
For each value x in the ``time_distances`` the decayed value is computed as
|
|
145
|
+
|
|
146
|
+
.. math::
|
|
147
|
+
|
|
148
|
+
f(x) = log_\\alpha ((\\alpha-1)(1-\\frac{x}{N}) + 1)
|
|
149
|
+
|
|
150
|
+
where :math:`alpha` is the decay parameter and :math:`N` is the ``max_distance`` parameter.
|
|
151
|
+
|
|
152
|
+
:param decay: The decay parameter, should be in the range ]1, inf[
|
|
153
|
+
:type decay: float
|
|
154
|
+
:param max_distance: Normalizing parameter, to put distances in the [0, 1].
|
|
155
|
+
:type max_distance: float
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def validate_decay(cls, decay: float):
|
|
160
|
+
"""Verify if the decay parameter is in the right range for this decay function."""
|
|
161
|
+
if not (1 < decay):
|
|
162
|
+
raise ValueError(f"Decay parameter = {decay} is not in the supported range: ]1, inf[.")
|
|
163
|
+
|
|
164
|
+
def __init__(self, decay: float, max_distance: float):
|
|
165
|
+
self.validate_decay(decay)
|
|
166
|
+
self.decay = decay
|
|
167
|
+
self.max_distance = max_distance
|
|
168
|
+
|
|
169
|
+
def __call__(self, time_distances: ArrayLike):
|
|
170
|
+
"""Apply the decay function.
|
|
171
|
+
|
|
172
|
+
:param time_distances: array of distances to be decayed.
|
|
173
|
+
:type time_distances: ArrayLike
|
|
174
|
+
:returns: The decayed time array.
|
|
175
|
+
:rtype: ArrayLike
|
|
176
|
+
"""
|
|
177
|
+
if (time_distances > self.max_distance).any():
|
|
178
|
+
raise ValueError(
|
|
179
|
+
"At least one of the distances is bigger than the specified max_distance."
|
|
180
|
+
)
|
|
181
|
+
return np.log(((self.decay - 1) * (1 - time_distances / self.max_distance)) + 1) / np.log(
|
|
182
|
+
self.decay
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class LinearDecay(DecayFunction):
|
|
187
|
+
"""Applies a linear decay function.
|
|
188
|
+
|
|
189
|
+
For each value x in the ``time_distances`` the decayed value is computed as
|
|
190
|
+
|
|
191
|
+
.. math::
|
|
192
|
+
|
|
193
|
+
f(x) = \\max(1 - (\\frac{x}{N}) \\alpha, 0)
|
|
194
|
+
|
|
195
|
+
where :math:`alpha` is the decay parameter and :math:`N` is the ``max_distance`` parameter.
|
|
196
|
+
|
|
197
|
+
:param decay: The decay parameter, should be in the [0, inf[ interval.
|
|
198
|
+
:type decay: float
|
|
199
|
+
:param max_distance: Normalizing parameter, to put distances in the [0, 1].
|
|
200
|
+
:type max_distance: float
|
|
201
|
+
"""
|
|
202
|
+
|
|
203
|
+
@classmethod
|
|
204
|
+
def validate_decay(cls, decay: float):
|
|
205
|
+
if not (0 <= decay):
|
|
206
|
+
raise ValueError(f"Decay parameter = {decay} is not in the supported range: [0, +inf[.")
|
|
207
|
+
|
|
208
|
+
def __init__(self, decay: float, max_distance: float):
|
|
209
|
+
self.validate_decay(decay)
|
|
210
|
+
self.decay = decay
|
|
211
|
+
self.max_distance = max_distance
|
|
212
|
+
|
|
213
|
+
def __call__(self, time_distances: ArrayLike):
|
|
214
|
+
"""Apply the decay function.
|
|
215
|
+
|
|
216
|
+
:param time_distances: array of distances to be decayed.
|
|
217
|
+
:type time_distances: ArrayLike
|
|
218
|
+
:returns: The decayed time array.
|
|
219
|
+
:rtype: ArrayLike
|
|
220
|
+
"""
|
|
221
|
+
if (time_distances > self.max_distance).any():
|
|
222
|
+
raise ValueError(
|
|
223
|
+
"At least one of the distances is bigger than the specified max_distance."
|
|
224
|
+
)
|
|
225
|
+
results = 1 - (time_distances / self.max_distance) * self.decay
|
|
226
|
+
results[results < 0] = 0
|
|
227
|
+
return results
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class InverseDecay(DecayFunction):
|
|
231
|
+
"""Invert the scores.
|
|
232
|
+
|
|
233
|
+
Decay parameter only added for interface unity.
|
|
234
|
+
For each value x in the ``time_distances`` the decayed value is computed as
|
|
235
|
+
|
|
236
|
+
.. math::
|
|
237
|
+
|
|
238
|
+
f(x) = \\frac{1}{x}
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
def __call__(self, time_distances: ArrayLike):
|
|
242
|
+
"""Apply the decay function.
|
|
243
|
+
|
|
244
|
+
:param time_distances: array of distances to be decayed.
|
|
245
|
+
:type time_distances: ArrayLike
|
|
246
|
+
:returns: The decayed time array.
|
|
247
|
+
:rtype: ArrayLike
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
results = time_distances.astype(float).copy()
|
|
251
|
+
results[results > 0] = 1 / results[results > 0]
|
|
252
|
+
results[results == 0] = 1
|
|
253
|
+
return results
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class NoDecay(ExponentialDecay):
|
|
257
|
+
"""Turns the array into a binary array."""
|
|
258
|
+
|
|
259
|
+
def __init__(self):
|
|
260
|
+
super().__init__(0)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
from recnexteval.algorithms.time_aware_item_knn.base import TARSItemKNN
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TARSItemKNNDing(TARSItemKNN):
|
|
12
|
+
"""Time aware variant of ItemKNN which uses an exponential decay function at prediction time and cosine similarity.
|
|
13
|
+
|
|
14
|
+
Algorithm as presented in
|
|
15
|
+
Yi Ding and Xue Li. 2005.
|
|
16
|
+
Time weight collaborative filtering.
|
|
17
|
+
In Proceedings of the 14th ACM international conference on Information and knowledge management (CIKM '05).
|
|
18
|
+
Association for Computing Machinery, New York, NY, USA, 485–492.
|
|
19
|
+
https://doi.org/10.1145/1099554.1099689
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
Computation of the similarity matrix is the same as normal ItemKNN.
|
|
23
|
+
When predicting however the user's older interactions are given less weight in the final prediction score.
|
|
24
|
+
|
|
25
|
+
.. math::
|
|
26
|
+
|
|
27
|
+
\\text{sim}(u, i) = \\sum\\limits_{j \\in X_u} e^{-\\alpha \\cdot \\delta t_{u,j}} \\cdot \\text{sim}(i, j)
|
|
28
|
+
|
|
29
|
+
Where :math:`\\alpha` is the `predict_decay` parameter.
|
|
30
|
+
|
|
31
|
+
:param K: How many neigbours to use per item,
|
|
32
|
+
make sure to pick a value below the number of columns of the matrix to fit on.
|
|
33
|
+
Defaults to 200
|
|
34
|
+
:type K: int, Optional
|
|
35
|
+
:param pad_with_popularity: Whether to pad the similarity matrix with RecentPop Algorithm.
|
|
36
|
+
Defaults to True.
|
|
37
|
+
:type pad_with_popularity: bool, optional
|
|
38
|
+
:param predict_decay: Defines the decay scaling used for decay during prediction.
|
|
39
|
+
Defaults to 1 / (24 * 3600).
|
|
40
|
+
This means for every day since an interaction, the value of it will be divided by 'e'.
|
|
41
|
+
:type predict_decay: float, optional
|
|
42
|
+
:param similarity: Which similarity measure to use. Defaults to `"cosine"`.
|
|
43
|
+
``["cosine", "conditional_probability"]`` are supported.
|
|
44
|
+
:type similarity: str, optional
|
|
45
|
+
|
|
46
|
+
This code is adapted from RecPack :cite:`recpack`
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
SUPPORTED_SIMILARITIES = ["cosine", "conditional_probability"]
|
|
50
|
+
|
|
51
|
+
def __init__(self, K: int = 200, pad_with_popularity: bool = True, predict_decay: float = 1 / (24 * 3600), similarity: str = "cosine"):
|
|
52
|
+
super().__init__(K=K, pad_with_popularity=pad_with_popularity, fit_decay=0, predict_decay=predict_decay, similarity=similarity, decay_function="exponential")
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
"""Module with time-dependent ItemKNN implementations"""
|
|
9
|
+
|
|
10
|
+
from recnexteval.algorithms.time_aware_item_knn.base import TARSItemKNN
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TARSItemKNNLiu(TARSItemKNN):
|
|
14
|
+
"""Time aware variant of ItemKNN which uses an exponential decay function and cosine similarity.
|
|
15
|
+
|
|
16
|
+
Algorithm as described in
|
|
17
|
+
Nathan N. Liu, Min Zhao, Evan Xiang, and Qiang Yang.
|
|
18
|
+
2010. Online evolutionary collaborative filtering.
|
|
19
|
+
In Proceedings of the fourth ACM conference on Recommender systems (RecSys '10).
|
|
20
|
+
Association for Computing Machinery, New York, NY, USA, 95–102.
|
|
21
|
+
https://doi.org/10.1145/1864708.1864729
|
|
22
|
+
|
|
23
|
+
The algorithm uses an exponential decay function:
|
|
24
|
+
|
|
25
|
+
.. math::
|
|
26
|
+
|
|
27
|
+
\\Gamma(x) = e^{- \\alpha \\cdot \\text{x}}
|
|
28
|
+
|
|
29
|
+
where :math:`\\alpha` is the decay scaling parameter,
|
|
30
|
+
and x is the time between the maximal timestamp in the matrix
|
|
31
|
+
and the timestamp of the event.
|
|
32
|
+
|
|
33
|
+
Similarity is computed on this weighted matrix, using cosine similarity.
|
|
34
|
+
At prediction time a user's history is weighted using the same formula with a different alpha.
|
|
35
|
+
This weighted history is then multiplied with the precomputed similarity matrix.
|
|
36
|
+
|
|
37
|
+
:param K: How many neigbours to use per item,
|
|
38
|
+
make sure to pick a value below the number of columns of the matrix to fit on.
|
|
39
|
+
Defaults to 200
|
|
40
|
+
:type K: int, optional
|
|
41
|
+
:param pad_with_popularity: Whether to pad the similarity matrix with RecentPop Algorithm.
|
|
42
|
+
Defaults to True.
|
|
43
|
+
:type pad_with_popularity: bool, optional
|
|
44
|
+
:param fit_decay: Defines the decay scaling used for decay during model fitting.
|
|
45
|
+
Defaults to 1 / (24 * 3600).
|
|
46
|
+
This means for every day since an interaction, the value of it will be divided by 'e'.
|
|
47
|
+
:type fit_decay: float, optional
|
|
48
|
+
:param predict_decay: Defines the decay scaling used for decay during prediction.
|
|
49
|
+
Defaults to 1 / (24 * 3600).
|
|
50
|
+
This means for every day since an interaction, the value of it will be divided by 'e'.
|
|
51
|
+
:type predict_decay: float, optional
|
|
52
|
+
|
|
53
|
+
This code is adapted from RecPack :cite:`recpack`
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
K: int = 200,
|
|
59
|
+
pad_with_popularity: bool = True,
|
|
60
|
+
fit_decay: float = 1 / (24 * 3600),
|
|
61
|
+
predict_decay: float = 1 / (24 * 3600),
|
|
62
|
+
):
|
|
63
|
+
super().__init__(
|
|
64
|
+
K=K, pad_with_popularity=pad_with_popularity, fit_decay=fit_decay, predict_decay=predict_decay, similarity="cosine", decay_function="exponential"
|
|
65
|
+
)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
from scipy.sparse import csr_matrix, diags
|
|
10
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
11
|
+
|
|
12
|
+
from recnexteval.utils import invert, to_binary
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compute_conditional_probability(X: csr_matrix, pop_discount: float = 0) -> csr_matrix:
|
|
16
|
+
"""Compute conditional probability like similarity.
|
|
17
|
+
|
|
18
|
+
Computation using equation (3) from the original ItemKNN paper.
|
|
19
|
+
'Item-based top-n recommendation algorithms.'
|
|
20
|
+
Deshpande, Mukund, and George Karypis
|
|
21
|
+
|
|
22
|
+
.. math ::
|
|
23
|
+
sim(i,j) = \\frac{\\sum\\limits_{u \\in U} \\mathbb{I}_{u,i} X_{u,j}}{Freq(i) \\times Freq(j)^{\\alpha}}
|
|
24
|
+
|
|
25
|
+
Where :math:`\\mathbb{I}_{ui}` is 1 if the user u has visited item i, and 0 otherwise.
|
|
26
|
+
And alpha is the pop_discount parameter.
|
|
27
|
+
Note that this is a non-symmetric similarity measure.
|
|
28
|
+
Given that X is a binary matrix, and alpha is set to 0,
|
|
29
|
+
this simplifies to pure conditional probability.
|
|
30
|
+
|
|
31
|
+
.. math::
|
|
32
|
+
sim(i,j) = \\frac{Freq(i \\land j)}{Freq(i)}
|
|
33
|
+
|
|
34
|
+
:param X: user x item matrix with scores per user, item pair.
|
|
35
|
+
:type X: csr_matrix
|
|
36
|
+
:param pop_discount: Parameter defining popularity discount. Defaults to 0
|
|
37
|
+
:type pop_discount: float, Optional.
|
|
38
|
+
"""
|
|
39
|
+
# matrix with co_mat_i,j = SUM(1_u,i * X_u,j for each user u)
|
|
40
|
+
# If the input matrix is binary, this is the cooccurence count matrix.
|
|
41
|
+
co_mat = to_binary(X).T @ X
|
|
42
|
+
|
|
43
|
+
# Compute the inverse of the item frequencies
|
|
44
|
+
A = invert(diags(to_binary(X).sum(axis=0).A[0]).tocsr())
|
|
45
|
+
|
|
46
|
+
if pop_discount:
|
|
47
|
+
# This has all item similarities
|
|
48
|
+
# Co_mat is weighted by both the frequencies of item i
|
|
49
|
+
# and the frequency of item j to the pop_discount power.
|
|
50
|
+
# If pop_discount = 1, this similarity is symmetric again.
|
|
51
|
+
item_cond_prob_similarities = A @ co_mat @ A.power(pop_discount)
|
|
52
|
+
else:
|
|
53
|
+
# Weight the co_mat with the amount of occurences of item i.
|
|
54
|
+
item_cond_prob_similarities = A @ co_mat
|
|
55
|
+
|
|
56
|
+
# Set diagonal to 0, because we don't support self similarity
|
|
57
|
+
item_cond_prob_similarities.setdiag(0)
|
|
58
|
+
|
|
59
|
+
return item_cond_prob_similarities
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def compute_cosine_similarity(X: csr_matrix) -> csr_matrix:
|
|
63
|
+
"""Compute the cosine similarity between the items in the matrix.
|
|
64
|
+
|
|
65
|
+
Self similarity is removed.
|
|
66
|
+
|
|
67
|
+
:param X: user x item matrix with scores per user, item pair.
|
|
68
|
+
:type X: csr_matrix
|
|
69
|
+
:return: similarity matrix
|
|
70
|
+
:rtype: csr_matrix
|
|
71
|
+
"""
|
|
72
|
+
# X.T otherwise we are doing a user KNN
|
|
73
|
+
item_cosine_similarities = cosine_similarity(X.T, dense_output=False)
|
|
74
|
+
item_cosine_similarities.setdiag(0)
|
|
75
|
+
# Set diagonal to 0, because we don't want to support self similarity
|
|
76
|
+
|
|
77
|
+
return item_cosine_similarities
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def compute_pearson_similarity(X: csr_matrix) -> csr_matrix:
|
|
81
|
+
"""Compute the pearson correlation as a similarity between each item in the matrix.
|
|
82
|
+
|
|
83
|
+
Self similarity is removed.
|
|
84
|
+
When computing similarity, the avg of nonzero entries per user is used.
|
|
85
|
+
|
|
86
|
+
:param X: Rating or psuedo rating matrix.
|
|
87
|
+
:type X: csr_matrix
|
|
88
|
+
:return: similarity matrix.
|
|
89
|
+
:rtype: csr_matrix
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
if (X == 1).sum() == X.nnz:
|
|
93
|
+
raise ValueError("Pearson similarity can not be computed on a binary matrix.")
|
|
94
|
+
|
|
95
|
+
count_per_item = (X > 0).sum(axis=0).A
|
|
96
|
+
|
|
97
|
+
avg_per_item = X.sum(axis=0).A.astype(float)
|
|
98
|
+
|
|
99
|
+
avg_per_item[count_per_item > 0] = (
|
|
100
|
+
avg_per_item[count_per_item > 0] / count_per_item[count_per_item > 0]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
X = X - (X > 0).multiply(avg_per_item)
|
|
104
|
+
|
|
105
|
+
# Given the rescaled matrix, the pearson correlation is just cosine similarity on this matrix.
|
|
106
|
+
return compute_cosine_similarity(X)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy.sparse import csr_matrix
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_top_K_ranks(X: csr_matrix, K: Optional[int] = None) -> csr_matrix:
|
|
15
|
+
"""Returns a matrix of ranks assigned to the largest K values in X.
|
|
16
|
+
|
|
17
|
+
Selects K largest values for every row in X and assigns a rank to each.
|
|
18
|
+
|
|
19
|
+
:param X: Matrix from which we will select K values in every row.
|
|
20
|
+
:type X: csr_matrix
|
|
21
|
+
:param K: Amount of values to select.
|
|
22
|
+
:type K: int, optional
|
|
23
|
+
:return: Matrix with K values per row.
|
|
24
|
+
:rtype: csr_matrix
|
|
25
|
+
"""
|
|
26
|
+
U, I, V = [], [], []
|
|
27
|
+
for row_ix, (le, ri) in enumerate(zip(X.indptr[:-1], X.indptr[1:])):
|
|
28
|
+
K_row_pick = min(K, ri - le) if K is not None else ri - le
|
|
29
|
+
|
|
30
|
+
if K_row_pick != 0:
|
|
31
|
+
top_k_row = X.indices[
|
|
32
|
+
le + np.argpartition(X.data[le:ri], list(range(-K_row_pick, 0)))[-K_row_pick:]
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
for rank, col_ix in enumerate(reversed(top_k_row)):
|
|
36
|
+
U.append(row_ix)
|
|
37
|
+
I.append(col_ix)
|
|
38
|
+
V.append(rank + 1)
|
|
39
|
+
|
|
40
|
+
X_top_K = csr_matrix((V, (U, I)), shape=X.shape)
|
|
41
|
+
|
|
42
|
+
return X_top_K
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_top_K_values(X: csr_matrix, K: Optional[int] = None) -> csr_matrix:
|
|
46
|
+
"""Returns a matrix of only the K largest values for every row in X.
|
|
47
|
+
|
|
48
|
+
Selects the top-K items for every user (which is equal to the K nearest neighbours.)
|
|
49
|
+
In case of a tie for the last position, the item with the largest index of the tied items is used.
|
|
50
|
+
|
|
51
|
+
:param X: Matrix from which we will select K values in every row.
|
|
52
|
+
:type X: csr_matrix
|
|
53
|
+
:param K: Amount of values to select.
|
|
54
|
+
:type K: int, optional
|
|
55
|
+
:return: Matrix with K values per row.
|
|
56
|
+
:rtype: csr_matrix
|
|
57
|
+
"""
|
|
58
|
+
top_K_ranks = get_top_K_ranks(X, K)
|
|
59
|
+
top_K_ranks[top_K_ranks > 0] = 1 # ranks to binary
|
|
60
|
+
|
|
61
|
+
return top_K_ranks.multiply(X) # elementwise multiplication
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
from scipy.sparse import csr_matrix
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
EPSILON = 1e-13
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def invert(x: np.ndarray | csr_matrix) -> np.ndarray | csr_matrix:
|
|
20
|
+
"""Invert an array.
|
|
21
|
+
|
|
22
|
+
:param x: [description]
|
|
23
|
+
:type x: [type]
|
|
24
|
+
:return: [description]
|
|
25
|
+
:rtype: [type]
|
|
26
|
+
"""
|
|
27
|
+
if isinstance(x, np.ndarray):
|
|
28
|
+
ret = np.zeros(x.shape)
|
|
29
|
+
elif isinstance(x, csr_matrix):
|
|
30
|
+
ret = csr_matrix(x.shape)
|
|
31
|
+
else:
|
|
32
|
+
raise TypeError("Unsupported type for argument x.")
|
|
33
|
+
ret[x.nonzero()] = 1 / x[x.nonzero()]
|
|
34
|
+
return ret
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def to_binary(X: csr_matrix) -> csr_matrix:
|
|
38
|
+
"""Converts a matrix to binary by setting all non-zero values to 1.
|
|
39
|
+
|
|
40
|
+
:param X: Matrix to convert to binary.
|
|
41
|
+
:type X: csr_matrix
|
|
42
|
+
:return: Binary matrix.
|
|
43
|
+
:rtype: csr_matrix
|
|
44
|
+
"""
|
|
45
|
+
X_binary = X.astype(bool).astype(X.dtype)
|
|
46
|
+
|
|
47
|
+
return X_binary
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
from recnexteval.algorithms.time_aware_item_knn.base import TARSItemKNN
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TARSItemKNNVaz(TARSItemKNN):
|
|
12
|
+
"""Time aware variant of ItemKNN which uses a exponential decay function and pearson similarity.
|
|
13
|
+
|
|
14
|
+
Algorithm as described in
|
|
15
|
+
Understanding the Temporal Dynamics of Recommendations across Different Rating Scales
|
|
16
|
+
Paula Cristina Vaz, Ricardo Ribeiro, David Martins de Matos.
|
|
17
|
+
Late-Breaking Results, Project Papers and Workshop Proceedings of the 21st Conference
|
|
18
|
+
on User Modeling, Adaptation, and Personalization. Rome, Italy, June 10-14, 2013.
|
|
19
|
+
|
|
20
|
+
The algorithm uses an exponential decay function:
|
|
21
|
+
|
|
22
|
+
.. math::
|
|
23
|
+
|
|
24
|
+
\\Gamma(x) = e^{- \\alpha \\cdot \\text{x}}
|
|
25
|
+
|
|
26
|
+
where :math:`\\alpha` is the decay scaling parameter,
|
|
27
|
+
and x is the time between the maximal timestamp in the matrix
|
|
28
|
+
and the timestamp of the event.
|
|
29
|
+
|
|
30
|
+
:param K: How many neigbours to use per item,
|
|
31
|
+
make sure to pick a value below the number of columns of the matrix to fit on.
|
|
32
|
+
Defaults to 200
|
|
33
|
+
:type K: int, optional
|
|
34
|
+
:param pad_with_popularity: Whether to pad the similarity matrix with RecentPop Algorithm.
|
|
35
|
+
Defaults to True.
|
|
36
|
+
:type pad_with_popularity: bool, optional
|
|
37
|
+
:param fit_decay: Defines the decay scaling used for decay during model fitting.
|
|
38
|
+
Defaults to 1/(24*3600).
|
|
39
|
+
This means for every day since an interaction, the value of it will be divided by 'e'.
|
|
40
|
+
:type fit_decay: float, optional
|
|
41
|
+
:param predict_decay: Defines the decay scaling used for decay during prediction.
|
|
42
|
+
Defaults to 1/(24*3600).
|
|
43
|
+
This means for every day since an interaction, the value of it will be divided by 'e'.
|
|
44
|
+
:type predict_decay: float, optional
|
|
45
|
+
|
|
46
|
+
This code is adapted from RecPack :cite:`recpack`
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, K: int = 200, pad_with_popularity: bool = True, fit_decay: float = 1 / (24 * 3600), predict_decay: float = 1 / (24 * 3600)):
|
|
50
|
+
super().__init__(K, pad_with_popularity=pad_with_popularity, fit_decay=fit_decay, predict_decay=predict_decay, similarity="pearson", decay_function="exponential")
|