recnexteval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recnexteval/__init__.py +20 -0
- recnexteval/algorithms/__init__.py +99 -0
- recnexteval/algorithms/base.py +377 -0
- recnexteval/algorithms/baseline/__init__.py +10 -0
- recnexteval/algorithms/baseline/decay_popularity.py +110 -0
- recnexteval/algorithms/baseline/most_popular.py +72 -0
- recnexteval/algorithms/baseline/random.py +39 -0
- recnexteval/algorithms/baseline/recent_popularity.py +34 -0
- recnexteval/algorithms/itemknn/__init__.py +14 -0
- recnexteval/algorithms/itemknn/itemknn.py +119 -0
- recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
- recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
- recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
- recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
- recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
- recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
- recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
- recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
- recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
- recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
- recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
- recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
- recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
- recnexteval/algorithms/utils.py +51 -0
- recnexteval/datasets/__init__.py +109 -0
- recnexteval/datasets/base.py +316 -0
- recnexteval/datasets/config/__init__.py +113 -0
- recnexteval/datasets/config/amazon.py +188 -0
- recnexteval/datasets/config/base.py +72 -0
- recnexteval/datasets/config/lastfm.py +105 -0
- recnexteval/datasets/config/movielens.py +169 -0
- recnexteval/datasets/config/yelp.py +25 -0
- recnexteval/datasets/datasets/__init__.py +24 -0
- recnexteval/datasets/datasets/amazon.py +151 -0
- recnexteval/datasets/datasets/base.py +250 -0
- recnexteval/datasets/datasets/lastfm.py +121 -0
- recnexteval/datasets/datasets/movielens.py +93 -0
- recnexteval/datasets/datasets/test.py +46 -0
- recnexteval/datasets/datasets/yelp.py +103 -0
- recnexteval/datasets/metadata/__init__.py +58 -0
- recnexteval/datasets/metadata/amazon.py +68 -0
- recnexteval/datasets/metadata/base.py +38 -0
- recnexteval/datasets/metadata/lastfm.py +110 -0
- recnexteval/datasets/metadata/movielens.py +87 -0
- recnexteval/evaluators/__init__.py +189 -0
- recnexteval/evaluators/accumulator.py +167 -0
- recnexteval/evaluators/base.py +216 -0
- recnexteval/evaluators/builder/__init__.py +125 -0
- recnexteval/evaluators/builder/base.py +166 -0
- recnexteval/evaluators/builder/pipeline.py +111 -0
- recnexteval/evaluators/builder/stream.py +54 -0
- recnexteval/evaluators/evaluator_pipeline.py +287 -0
- recnexteval/evaluators/evaluator_stream.py +374 -0
- recnexteval/evaluators/state_management.py +310 -0
- recnexteval/evaluators/strategy.py +32 -0
- recnexteval/evaluators/util.py +124 -0
- recnexteval/matrix/__init__.py +48 -0
- recnexteval/matrix/exception.py +5 -0
- recnexteval/matrix/interaction_matrix.py +784 -0
- recnexteval/matrix/prediction_matrix.py +153 -0
- recnexteval/matrix/util.py +24 -0
- recnexteval/metrics/__init__.py +57 -0
- recnexteval/metrics/binary/__init__.py +4 -0
- recnexteval/metrics/binary/hit.py +49 -0
- recnexteval/metrics/core/__init__.py +10 -0
- recnexteval/metrics/core/base.py +126 -0
- recnexteval/metrics/core/elementwise_top_k.py +75 -0
- recnexteval/metrics/core/listwise_top_k.py +72 -0
- recnexteval/metrics/core/top_k.py +60 -0
- recnexteval/metrics/core/util.py +29 -0
- recnexteval/metrics/ranking/__init__.py +6 -0
- recnexteval/metrics/ranking/dcg.py +55 -0
- recnexteval/metrics/ranking/ndcg.py +78 -0
- recnexteval/metrics/ranking/precision.py +51 -0
- recnexteval/metrics/ranking/recall.py +42 -0
- recnexteval/models/__init__.py +4 -0
- recnexteval/models/base.py +69 -0
- recnexteval/preprocessing/__init__.py +37 -0
- recnexteval/preprocessing/filter.py +181 -0
- recnexteval/preprocessing/preprocessor.py +137 -0
- recnexteval/registries/__init__.py +67 -0
- recnexteval/registries/algorithm.py +68 -0
- recnexteval/registries/base.py +131 -0
- recnexteval/registries/dataset.py +37 -0
- recnexteval/registries/metric.py +57 -0
- recnexteval/settings/__init__.py +127 -0
- recnexteval/settings/base.py +414 -0
- recnexteval/settings/exception.py +8 -0
- recnexteval/settings/leave_n_out_setting.py +48 -0
- recnexteval/settings/processor.py +115 -0
- recnexteval/settings/schema.py +11 -0
- recnexteval/settings/single_time_point_setting.py +111 -0
- recnexteval/settings/sliding_window_setting.py +153 -0
- recnexteval/settings/splitters/__init__.py +14 -0
- recnexteval/settings/splitters/base.py +57 -0
- recnexteval/settings/splitters/n_last.py +39 -0
- recnexteval/settings/splitters/n_last_timestamp.py +76 -0
- recnexteval/settings/splitters/timestamp.py +82 -0
- recnexteval/settings/util.py +0 -0
- recnexteval/utils/__init__.py +115 -0
- recnexteval/utils/json_to_csv_converter.py +128 -0
- recnexteval/utils/logging_tools.py +159 -0
- recnexteval/utils/path.py +155 -0
- recnexteval/utils/url_certificate_installer.py +54 -0
- recnexteval/utils/util.py +166 -0
- recnexteval/utils/uuid_util.py +7 -0
- recnexteval/utils/yaml_tool.py +65 -0
- recnexteval-0.1.0.dist-info/METADATA +85 -0
- recnexteval-0.1.0.dist-info/RECORD +110 -0
- recnexteval-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,784 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import operator
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
from typing import Literal, overload
|
|
7
|
+
from warnings import warn
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from scipy.sparse import csr_matrix
|
|
12
|
+
|
|
13
|
+
from .exception import TimestampAttributeMissingError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ItemUserBasedEnum(StrEnum):
|
|
20
|
+
"""Enum class for item and user based properties.
|
|
21
|
+
|
|
22
|
+
Enum class to indicate if the function or logic is based on item or user.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
ITEM = "item"
|
|
26
|
+
"""Property based on item"""
|
|
27
|
+
USER = "user"
|
|
28
|
+
"""Property based on user"""
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def has_value(cls, value: str) -> bool:
|
|
32
|
+
"""Check valid value for ItemUserBasedEnum
|
|
33
|
+
|
|
34
|
+
:param value: String value input
|
|
35
|
+
:type value: str
|
|
36
|
+
"""
|
|
37
|
+
return value in ItemUserBasedEnum
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class InteractionMatrix:
|
|
41
|
+
"""Matrix of interaction data between users and items.
|
|
42
|
+
|
|
43
|
+
It provides a number of properties and methods for easy manipulation of this interaction data.
|
|
44
|
+
|
|
45
|
+
.. attention::
|
|
46
|
+
|
|
47
|
+
- The InteractionMatrix does not assume binary user-item pairs.
|
|
48
|
+
If a user interacts with an item more than once, there will be two
|
|
49
|
+
entries for this user-item pair.
|
|
50
|
+
|
|
51
|
+
- We assume that the user and item IDs are integers starting from 0. IDs
|
|
52
|
+
that are indicated by "-1" are reserved to label the user or item to
|
|
53
|
+
be predicted. This assumption is crucial as it will be used during the
|
|
54
|
+
split scheme and evaluation of the RS since it will affect the 2D shape
|
|
55
|
+
of the CSR matrix
|
|
56
|
+
|
|
57
|
+
:param df: Dataframe containing user-item interactions. Must contain at least
|
|
58
|
+
item ids and user ids.
|
|
59
|
+
:type df: pd.DataFrame
|
|
60
|
+
:param item_ix: Item ids column name.
|
|
61
|
+
:type item_ix: str
|
|
62
|
+
:param user_ix: User ids column name.
|
|
63
|
+
:type user_ix: str
|
|
64
|
+
:param timestamp_ix: Interaction timestamps column name.
|
|
65
|
+
:type timestamp_ix: str
|
|
66
|
+
:param shape: The desired shape of the matrix, i.e. the number of users and items.
|
|
67
|
+
If no shape is specified, the number of users will be equal to the
|
|
68
|
+
maximum user id plus one, the number of items to the maximum item
|
|
69
|
+
id plus one.
|
|
70
|
+
:type shape: tuple[int, int], optional
|
|
71
|
+
:param skip_df_processing: Skip processing of the dataframe. This is useful
|
|
72
|
+
when the dataframe is already processed and the columns are already
|
|
73
|
+
renamed.
|
|
74
|
+
:type skip_df_processing: bool, optional
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
ITEM_IX = "iid"
|
|
78
|
+
USER_IX = "uid"
|
|
79
|
+
TIMESTAMP_IX = "ts"
|
|
80
|
+
INTERACTION_IX = "interactionid"
|
|
81
|
+
MASKED_LABEL = -1
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
df: pd.DataFrame,
|
|
86
|
+
item_ix: str,
|
|
87
|
+
user_ix: str,
|
|
88
|
+
timestamp_ix: str,
|
|
89
|
+
shape: None | tuple[int, int] = None,
|
|
90
|
+
skip_df_processing: bool = False,
|
|
91
|
+
) -> None:
|
|
92
|
+
self.user_item_shape: tuple[int, int]
|
|
93
|
+
"""The shape of the interaction matrix, i.e. `|user| x |item|`."""
|
|
94
|
+
if shape:
|
|
95
|
+
self.user_item_shape = shape
|
|
96
|
+
|
|
97
|
+
if skip_df_processing:
|
|
98
|
+
self._df = df
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
col_mapper = {
|
|
102
|
+
item_ix: InteractionMatrix.ITEM_IX,
|
|
103
|
+
user_ix: InteractionMatrix.USER_IX,
|
|
104
|
+
timestamp_ix: InteractionMatrix.TIMESTAMP_IX,
|
|
105
|
+
}
|
|
106
|
+
df = df.rename(columns=col_mapper)
|
|
107
|
+
required_columns = [
|
|
108
|
+
InteractionMatrix.USER_IX,
|
|
109
|
+
InteractionMatrix.ITEM_IX,
|
|
110
|
+
InteractionMatrix.TIMESTAMP_IX,
|
|
111
|
+
]
|
|
112
|
+
extra_columns = [col for col in df.columns if col not in required_columns]
|
|
113
|
+
df = df[required_columns + extra_columns].copy()
|
|
114
|
+
# TODO refactor this statement below
|
|
115
|
+
df = df.reset_index(drop=True).reset_index().rename(columns={"index": InteractionMatrix.INTERACTION_IX})
|
|
116
|
+
|
|
117
|
+
self._df = df
|
|
118
|
+
|
|
119
|
+
def copy(self) -> "InteractionMatrix":
|
|
120
|
+
"""Create a deep copy of this InteractionMatrix.
|
|
121
|
+
|
|
122
|
+
:return: Deep copy of this InteractionMatrix.
|
|
123
|
+
:rtype: InteractionMatrix
|
|
124
|
+
"""
|
|
125
|
+
return deepcopy(self)
|
|
126
|
+
|
|
127
|
+
def copy_df(self, reset_index: bool = False) -> "pd.DataFrame":
|
|
128
|
+
"""Create a deep copy of the dataframe.
|
|
129
|
+
|
|
130
|
+
:return: Deep copy of dataframe.
|
|
131
|
+
:rtype: pd.DataFrame
|
|
132
|
+
"""
|
|
133
|
+
if reset_index:
|
|
134
|
+
return deepcopy(self._df.reset_index(drop=True))
|
|
135
|
+
return deepcopy(self._df)
|
|
136
|
+
|
|
137
|
+
def concat(self, im: "InteractionMatrix | pd.DataFrame") -> "InteractionMatrix":
|
|
138
|
+
"""Concatenate this InteractionMatrix with another.
|
|
139
|
+
|
|
140
|
+
.. note::
|
|
141
|
+
This is a inplace operation. and will modify the current object.
|
|
142
|
+
|
|
143
|
+
:param im: InteractionMatrix to concat with.
|
|
144
|
+
:type im: Union[InteractionMatrix, pd.DataFrame]
|
|
145
|
+
:return: InteractionMatrix with the interactions from both matrices.
|
|
146
|
+
:rtype: InteractionMatrix
|
|
147
|
+
"""
|
|
148
|
+
if isinstance(im, pd.DataFrame):
|
|
149
|
+
self._df = pd.concat([self._df, im])
|
|
150
|
+
else:
|
|
151
|
+
self._df = pd.concat([self._df, im._df])
|
|
152
|
+
|
|
153
|
+
return self
|
|
154
|
+
|
|
155
|
+
# TODO this should be shifted to prediction matrix
|
|
156
|
+
def union(self, im: "InteractionMatrix") -> "InteractionMatrix":
|
|
157
|
+
"""Combine events from this InteractionMatrix with another.
|
|
158
|
+
|
|
159
|
+
:param im: InteractionMatrix to union with.
|
|
160
|
+
:type im: InteractionMatrix
|
|
161
|
+
:return: Union of interactions in this InteractionMatrix and the other.
|
|
162
|
+
:rtype: InteractionMatrix
|
|
163
|
+
"""
|
|
164
|
+
return self + im
|
|
165
|
+
|
|
166
|
+
def difference(self, im: "InteractionMatrix") -> "InteractionMatrix":
|
|
167
|
+
"""Difference between this InteractionMatrix and another.
|
|
168
|
+
|
|
169
|
+
:param im: InteractionMatrix to subtract from this.
|
|
170
|
+
:type im: InteractionMatrix
|
|
171
|
+
:return: Difference between this InteractionMatrix and the other.
|
|
172
|
+
:rtype: InteractionMatrix
|
|
173
|
+
"""
|
|
174
|
+
return self - im
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def values(self) -> csr_matrix:
|
|
178
|
+
"""All user-item interactions as a sparse matrix of size (|`global_users`|, |`global_items`|).
|
|
179
|
+
|
|
180
|
+
Each entry is the number of interactions between that user and item.
|
|
181
|
+
If there are no interactions between a user and item, the entry is 0.
|
|
182
|
+
|
|
183
|
+
:return: Interactions between users and items as a csr_matrix.
|
|
184
|
+
:rtype: csr_matrix
|
|
185
|
+
"""
|
|
186
|
+
# TODO issue with -1 labeling in the interaction matrix should i create prediction matrix
|
|
187
|
+
if not hasattr(self, "user_item_shape"):
|
|
188
|
+
raise AttributeError("InteractionMatrix has no `user_item_shape` attribute. Please call mask_shape() first.")
|
|
189
|
+
|
|
190
|
+
values = np.ones(self._df.shape[0])
|
|
191
|
+
indices = self._df[[InteractionMatrix.USER_IX, InteractionMatrix.ITEM_IX]].values
|
|
192
|
+
indices = (indices[:, 0], indices[:, 1])
|
|
193
|
+
|
|
194
|
+
matrix = csr_matrix((values, indices), shape=self.user_item_shape, dtype=np.int32)
|
|
195
|
+
return matrix
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def indices(self) -> tuple[list[int], list[int]]:
|
|
199
|
+
"""Returns a tuple of lists of user IDs and item IDs corresponding to interactions.
|
|
200
|
+
|
|
201
|
+
:return: tuple of lists of user IDs and item IDs that correspond to at least one interaction.
|
|
202
|
+
:rtype: tuple[list[int], list[int]]
|
|
203
|
+
"""
|
|
204
|
+
return self.values.nonzero()
|
|
205
|
+
|
|
206
|
+
def nonzero(self) -> tuple[list[int], list[int]]:
|
|
207
|
+
return self.values.nonzero()
|
|
208
|
+
|
|
209
|
+
@overload
|
|
210
|
+
def users_in(self, U: set[int]) -> "InteractionMatrix": ...
|
|
211
|
+
@overload
|
|
212
|
+
def users_in(self, U: set[int], inplace: Literal[False]) -> "InteractionMatrix": ...
|
|
213
|
+
@overload
|
|
214
|
+
def users_in(self, U: set[int], inplace: Literal[True]) -> None: ...
|
|
215
|
+
def users_in(self, U: set[int], inplace: bool = False) -> "None | InteractionMatrix":
|
|
216
|
+
"""Keep only interactions by one of the specified users.
|
|
217
|
+
|
|
218
|
+
:param U: A set or list of users to select the interactions from.
|
|
219
|
+
:type U: Union[set[int]
|
|
220
|
+
:param inplace: Apply the selection in place or not, defaults to False
|
|
221
|
+
:type inplace: bool, optional
|
|
222
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
223
|
+
:rtype: Union[InteractionMatrix, None]
|
|
224
|
+
"""
|
|
225
|
+
logger.debug("Performing users_in comparison")
|
|
226
|
+
|
|
227
|
+
mask = self._df[InteractionMatrix.USER_IX].isin(U)
|
|
228
|
+
|
|
229
|
+
return self._apply_mask(mask, inplace=inplace)
|
|
230
|
+
|
|
231
|
+
@overload
|
|
232
|
+
def _apply_mask(self, mask: pd.Series) -> "InteractionMatrix": ...
|
|
233
|
+
@overload
|
|
234
|
+
def _apply_mask(self, mask: pd.Series, inplace: Literal[True]) -> None: ...
|
|
235
|
+
@overload
|
|
236
|
+
def _apply_mask(self, mask: pd.Series, inplace: Literal[False]) -> "InteractionMatrix": ...
|
|
237
|
+
def _apply_mask(self, mask: pd.Series, inplace: bool = False) -> "None | InteractionMatrix":
|
|
238
|
+
interaction_m = self if inplace else self.copy()
|
|
239
|
+
interaction_m._df = interaction_m._df[mask]
|
|
240
|
+
return None if inplace else interaction_m
|
|
241
|
+
|
|
242
|
+
def _timestamps_cmp(self, op: Callable, timestamp: float, inplace: bool = False) -> "None | InteractionMatrix":
|
|
243
|
+
"""Filter interactions based on timestamp.
|
|
244
|
+
Keep only interactions for which op(t, timestamp) is True.
|
|
245
|
+
|
|
246
|
+
:param op: Comparison operator.
|
|
247
|
+
:type op: Callable
|
|
248
|
+
:param timestamp: Timestamp to compare against in seconds from epoch.
|
|
249
|
+
:type timestamp: float
|
|
250
|
+
:param inplace: Modify the data matrix in place. If False, returns a new object.
|
|
251
|
+
:type inplace: bool, optional
|
|
252
|
+
"""
|
|
253
|
+
logger.debug(f"Performing {op.__name__}(t, {timestamp})")
|
|
254
|
+
|
|
255
|
+
mask = op(self._df[InteractionMatrix.TIMESTAMP_IX], timestamp)
|
|
256
|
+
return self._apply_mask(mask, inplace=inplace)
|
|
257
|
+
|
|
258
|
+
@overload
|
|
259
|
+
def timestamps_gt(self, timestamp: float) -> "InteractionMatrix": ...
|
|
260
|
+
@overload
|
|
261
|
+
def timestamps_gt(self, timestamp: float, inplace: Literal[True]) -> None: ...
|
|
262
|
+
def timestamps_gt(self, timestamp: float, inplace: bool = False) -> "None | InteractionMatrix":
|
|
263
|
+
"""Select interactions after a given timestamp.
|
|
264
|
+
|
|
265
|
+
:param timestamp: The timestamp with which
|
|
266
|
+
the interactions timestamp is compared.
|
|
267
|
+
:type timestamp: float
|
|
268
|
+
:param inplace: Apply the selection in place if True, defaults to False
|
|
269
|
+
:type inplace: bool, optional
|
|
270
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
271
|
+
:rtype: Union[InteractionMatrix, None]
|
|
272
|
+
"""
|
|
273
|
+
return self._timestamps_cmp(operator.gt, timestamp, inplace)
|
|
274
|
+
|
|
275
|
+
@overload
|
|
276
|
+
def timestamps_gte(self, timestamp: float) -> "InteractionMatrix": ...
|
|
277
|
+
@overload
|
|
278
|
+
def timestamps_gte(self, timestamp: float, inplace: Literal[True]) -> None: ...
|
|
279
|
+
def timestamps_gte(self, timestamp: float, inplace: bool = False) -> "None | InteractionMatrix":
|
|
280
|
+
"""Select interactions after and including a given timestamp.
|
|
281
|
+
|
|
282
|
+
:param timestamp: The timestamp with which
|
|
283
|
+
the interactions timestamp is compared.
|
|
284
|
+
:type timestamp: float
|
|
285
|
+
:param inplace: Apply the selection in place if True, defaults to False
|
|
286
|
+
:type inplace: bool, optional
|
|
287
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
288
|
+
:rtype: Union[InteractionMatrix, None]
|
|
289
|
+
"""
|
|
290
|
+
return self._timestamps_cmp(operator.ge, timestamp, inplace)
|
|
291
|
+
|
|
292
|
+
@overload
|
|
293
|
+
def timestamps_lt(self, timestamp: float) -> "InteractionMatrix": ...
|
|
294
|
+
@overload
|
|
295
|
+
def timestamps_lt(self, timestamp: float, inplace: Literal[True]) -> None: ...
|
|
296
|
+
def timestamps_lt(self, timestamp: float, inplace: bool = False) -> "None | InteractionMatrix":
|
|
297
|
+
"""Select interactions up to a given timestamp.
|
|
298
|
+
|
|
299
|
+
:param timestamp: The timestamp with which
|
|
300
|
+
the interactions timestamp is compared.
|
|
301
|
+
:type timestamp: float
|
|
302
|
+
:param inplace: Apply the selection in place if True, defaults to False
|
|
303
|
+
:type inplace: bool, optional
|
|
304
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
305
|
+
:rtype: Union[InteractionMatrix, None]
|
|
306
|
+
"""
|
|
307
|
+
return self._timestamps_cmp(operator.lt, timestamp, inplace)
|
|
308
|
+
|
|
309
|
+
@overload
|
|
310
|
+
def timestamps_lte(self, timestamp: float) -> "InteractionMatrix": ...
|
|
311
|
+
@overload
|
|
312
|
+
def timestamps_lte(self, timestamp: float, inplace: Literal[True]) -> None: ...
|
|
313
|
+
def timestamps_lte(self, timestamp: float, inplace: bool = False) -> "None | InteractionMatrix":
|
|
314
|
+
"""Select interactions up to and including a given timestamp.
|
|
315
|
+
|
|
316
|
+
:param timestamp: The timestamp with which
|
|
317
|
+
the interactions timestamp is compared.
|
|
318
|
+
:type timestamp: float
|
|
319
|
+
:param inplace: Apply the selection in place if True, defaults to False
|
|
320
|
+
:type inplace: bool, optional
|
|
321
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
322
|
+
:rtype: Union[InteractionMatrix, None]
|
|
323
|
+
"""
|
|
324
|
+
return self._timestamps_cmp(operator.le, timestamp, inplace)
|
|
325
|
+
|
|
326
|
+
def __add__(self, im: "InteractionMatrix") -> "InteractionMatrix":
|
|
327
|
+
"""Combine events from this InteractionMatrix with another.
|
|
328
|
+
|
|
329
|
+
:param im: InteractionMatrix to union with.
|
|
330
|
+
:type im: InteractionMatrix
|
|
331
|
+
:return: Union of interactions in this InteractionMatrix and the other.
|
|
332
|
+
:rtype: InteractionMatrix
|
|
333
|
+
"""
|
|
334
|
+
df = pd.concat([self._df, im._df], copy=False)
|
|
335
|
+
|
|
336
|
+
shape = None
|
|
337
|
+
if hasattr(self, "user_item_shape") and hasattr(im, "user_item_shape"):
|
|
338
|
+
shape = (max(self.user_item_shape[0], im.user_item_shape[0]), max(self.user_item_shape[1], im.user_item_shape[1]))
|
|
339
|
+
self.user_item_shape = shape
|
|
340
|
+
|
|
341
|
+
return InteractionMatrix(
|
|
342
|
+
df,
|
|
343
|
+
InteractionMatrix.ITEM_IX,
|
|
344
|
+
InteractionMatrix.USER_IX,
|
|
345
|
+
InteractionMatrix.TIMESTAMP_IX,
|
|
346
|
+
shape,
|
|
347
|
+
True,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
def __sub__(self, im: "InteractionMatrix") -> "InteractionMatrix":
|
|
351
|
+
full_data = pd.MultiIndex.from_frame(self._df)
|
|
352
|
+
data_part_2 = pd.MultiIndex.from_frame(im._df)
|
|
353
|
+
data_part_1 = full_data.difference(data_part_2).to_frame().reset_index(drop=True)
|
|
354
|
+
|
|
355
|
+
shape = None
|
|
356
|
+
if hasattr(self, "user_item_shape") and hasattr(im, "user_item_shape"):
|
|
357
|
+
shape = (max(self.user_item_shape[0], im.user_item_shape[0]), max(self.user_item_shape[1], im.user_item_shape[1]))
|
|
358
|
+
|
|
359
|
+
return InteractionMatrix(
|
|
360
|
+
data_part_1,
|
|
361
|
+
InteractionMatrix.ITEM_IX,
|
|
362
|
+
InteractionMatrix.USER_IX,
|
|
363
|
+
InteractionMatrix.TIMESTAMP_IX,
|
|
364
|
+
shape,
|
|
365
|
+
True,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
def __repr__(self) -> str:
|
|
369
|
+
return repr(self._df)
|
|
370
|
+
|
|
371
|
+
def __eq__(self, value: object) -> bool:
|
|
372
|
+
if not isinstance(value, InteractionMatrix):
|
|
373
|
+
logger.debug(f"Comparing {type(value)} with InteractionMatrix is not supported")
|
|
374
|
+
return False
|
|
375
|
+
return self._df.equals(value._df)
|
|
376
|
+
|
|
377
|
+
def __len__(self) -> int:
|
|
378
|
+
"""Return the number of interactions in the matrix.
|
|
379
|
+
|
|
380
|
+
This is distinct from the shape of the matrix, which is the number of
|
|
381
|
+
users and items that has been released to the model. The length of the
|
|
382
|
+
matrix is the number of interactions present in the matrix resulting
|
|
383
|
+
from filter operations.
|
|
384
|
+
"""
|
|
385
|
+
return len(self._df)
|
|
386
|
+
|
|
387
|
+
@overload
|
|
388
|
+
def items_in(self, id_set: set[int]) -> "InteractionMatrix": ...
|
|
389
|
+
@overload
|
|
390
|
+
def items_in(self, id_set: set[int], inplace: Literal[False]) -> "InteractionMatrix": ...
|
|
391
|
+
@overload
|
|
392
|
+
def items_in(self, id_set: set[int], inplace: Literal[True]) -> None: ...
|
|
393
|
+
def items_in(self, id_set: set[int], inplace=False) -> "None | InteractionMatrix":
|
|
394
|
+
"""Keep only interactions with the specified items.
|
|
395
|
+
|
|
396
|
+
:param id_set: A set or list of items to select the interactions.
|
|
397
|
+
:type id_set: set[int]
|
|
398
|
+
:param inplace: Apply the selection in place or not, defaults to False
|
|
399
|
+
:type inplace: bool, optional
|
|
400
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
401
|
+
:rtype: Union[InteractionMatrix, None]
|
|
402
|
+
"""
|
|
403
|
+
logger.debug("Performing items_in comparison")
|
|
404
|
+
|
|
405
|
+
mask = self._df[InteractionMatrix.ITEM_IX].isin(id_set)
|
|
406
|
+
|
|
407
|
+
return self._apply_mask(mask, inplace=inplace)
|
|
408
|
+
|
|
409
|
+
@overload
|
|
410
|
+
def items_not_in(self, id_set: set[int]) -> "InteractionMatrix": ...
|
|
411
|
+
@overload
|
|
412
|
+
def items_not_in(self, id_set: set[int], inplace: Literal[False]) -> "InteractionMatrix": ...
|
|
413
|
+
@overload
|
|
414
|
+
def items_not_in(self, id_set: set[int], inplace: Literal[True]) -> None: ...
|
|
415
|
+
def items_not_in(self, id_set: set[int], inplace: bool=False) -> "None | InteractionMatrix":
|
|
416
|
+
"""Keep only interactions not with the specified items.
|
|
417
|
+
|
|
418
|
+
:param id_set: A set or list of items to exclude from the interactions.
|
|
419
|
+
:type id_set: set[int]
|
|
420
|
+
:param inplace: Apply the selection in place or not, defaults to False
|
|
421
|
+
:type inplace: bool, optional
|
|
422
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
423
|
+
:rtype: Union[InteractionMatrix, None]
|
|
424
|
+
"""
|
|
425
|
+
logger.debug("Performing items_not_in comparison")
|
|
426
|
+
|
|
427
|
+
mask = ~self._df[InteractionMatrix.ITEM_IX].isin(id_set)
|
|
428
|
+
|
|
429
|
+
return self._apply_mask(mask, inplace=inplace)
|
|
430
|
+
|
|
431
|
+
def users_not_in(self, U: set[int], inplace=False) -> "None | InteractionMatrix":
|
|
432
|
+
"""Keep only interactions not by the specified users.
|
|
433
|
+
|
|
434
|
+
:param U: A set or list of users to exclude from the interactions.
|
|
435
|
+
:type U: set[int]
|
|
436
|
+
:param inplace: Apply the selection in place or not, defaults to False
|
|
437
|
+
:type inplace: bool, optional
|
|
438
|
+
:return: None if `inplace`, otherwise returns a new InteractionMatrix object
|
|
439
|
+
:rtype: Union[InteractionMatrix, None]
|
|
440
|
+
"""
|
|
441
|
+
logger.debug("Performing users_not_in comparison")
|
|
442
|
+
|
|
443
|
+
mask = ~self._df[InteractionMatrix.USER_IX].isin(U)
|
|
444
|
+
|
|
445
|
+
return self._apply_mask(mask, inplace=inplace)
|
|
446
|
+
|
|
447
|
+
def interactions_in(self, interaction_ids: list[int], inplace: bool = False) -> "None | InteractionMatrix":
|
|
448
|
+
"""Select the interactions by their interaction ids
|
|
449
|
+
|
|
450
|
+
:param interaction_ids: A list of interaction ids
|
|
451
|
+
:type interaction_ids: list[int]
|
|
452
|
+
:param inplace: Apply the selection in place,
|
|
453
|
+
or return a new InteractionMatrix object, defaults to False
|
|
454
|
+
:type inplace: bool, optional
|
|
455
|
+
:return: None if inplace, otherwise new InteractionMatrix
|
|
456
|
+
object with the selected interactions
|
|
457
|
+
:rtype: Union[None, InteractionMatrix]
|
|
458
|
+
"""
|
|
459
|
+
logger.debug("Performing interactions_in comparison")
|
|
460
|
+
|
|
461
|
+
mask = self._df[InteractionMatrix.INTERACTION_IX].isin(interaction_ids)
|
|
462
|
+
|
|
463
|
+
unknown_interaction_ids = set(interaction_ids).difference(self._df[InteractionMatrix.INTERACTION_IX].unique())
|
|
464
|
+
|
|
465
|
+
if unknown_interaction_ids:
|
|
466
|
+
warn(f"IDs {unknown_interaction_ids} not present in data")
|
|
467
|
+
if not interaction_ids:
|
|
468
|
+
warn("No interaction IDs given, returning empty InteractionMatrix.")
|
|
469
|
+
|
|
470
|
+
return self._apply_mask(mask, inplace=inplace)
|
|
471
|
+
|
|
472
|
+
def _get_last_n_interactions(
|
|
473
|
+
self,
|
|
474
|
+
by: ItemUserBasedEnum,
|
|
475
|
+
n_seq_data: int,
|
|
476
|
+
t_upper: None | int = None,
|
|
477
|
+
id_in: None | set[int] = None,
|
|
478
|
+
inplace=False,
|
|
479
|
+
) -> "InteractionMatrix":
|
|
480
|
+
if not self.has_timestamps:
|
|
481
|
+
raise TimestampAttributeMissingError()
|
|
482
|
+
if t_upper is None:
|
|
483
|
+
t_upper = self.max_timestamp + 1 # to include the last timestamp
|
|
484
|
+
|
|
485
|
+
interaction_m = self if inplace else self.copy()
|
|
486
|
+
|
|
487
|
+
mask = interaction_m._df[InteractionMatrix.TIMESTAMP_IX] < t_upper
|
|
488
|
+
if id_in and by == ItemUserBasedEnum.USER:
|
|
489
|
+
mask = mask & interaction_m._df[InteractionMatrix.USER_IX].isin(id_in)
|
|
490
|
+
elif id_in and by == ItemUserBasedEnum.ITEM:
|
|
491
|
+
mask = mask & interaction_m._df[InteractionMatrix.ITEM_IX].isin(id_in)
|
|
492
|
+
|
|
493
|
+
if by == ItemUserBasedEnum.USER:
|
|
494
|
+
c_df = interaction_m._df[mask].groupby(InteractionMatrix.USER_IX).tail(n_seq_data)
|
|
495
|
+
else:
|
|
496
|
+
c_df = interaction_m._df[mask].groupby(InteractionMatrix.ITEM_IX).tail(n_seq_data)
|
|
497
|
+
interaction_m._df = c_df
|
|
498
|
+
|
|
499
|
+
return interaction_m
|
|
500
|
+
|
|
501
|
+
def _get_first_n_interactions(
|
|
502
|
+
self, by: ItemUserBasedEnum, n_seq_data: int, t_lower: None | int = None, inplace=False
|
|
503
|
+
) -> "InteractionMatrix":
|
|
504
|
+
if not self.has_timestamps:
|
|
505
|
+
raise TimestampAttributeMissingError()
|
|
506
|
+
if t_lower is None:
|
|
507
|
+
t_lower = self.min_timestamp
|
|
508
|
+
|
|
509
|
+
interaction_m = self if inplace else self.copy()
|
|
510
|
+
|
|
511
|
+
mask = interaction_m._df[InteractionMatrix.TIMESTAMP_IX] >= t_lower
|
|
512
|
+
if by == ItemUserBasedEnum.USER:
|
|
513
|
+
c_df = interaction_m._df[mask].groupby(InteractionMatrix.USER_IX).head(n_seq_data)
|
|
514
|
+
else:
|
|
515
|
+
c_df = interaction_m._df[mask].groupby(InteractionMatrix.ITEM_IX).head(n_seq_data)
|
|
516
|
+
interaction_m._df = c_df
|
|
517
|
+
return interaction_m
|
|
518
|
+
|
|
519
|
+
def get_users_n_last_interaction(
|
|
520
|
+
self,
|
|
521
|
+
n_seq_data: int = 1,
|
|
522
|
+
t_upper: None | int = None,
|
|
523
|
+
user_in: None | set[int] = None,
|
|
524
|
+
inplace: bool = False,
|
|
525
|
+
) -> "InteractionMatrix":
|
|
526
|
+
"""Select the last n interactions for each user.
|
|
527
|
+
|
|
528
|
+
:param n_seq_data: Number of interactions to select, defaults to 1
|
|
529
|
+
:type n_seq_data: int, optional
|
|
530
|
+
:param t_upper: Seconds past t. Upper limit for the timestamp
|
|
531
|
+
of the interactions to select, defaults to None
|
|
532
|
+
:type t_upper: None | int, optional
|
|
533
|
+
:param user_in: set of user IDs to select the interactions from,
|
|
534
|
+
defaults to None
|
|
535
|
+
:type user_in: None | set[int], optional
|
|
536
|
+
:param inplace: If operation is inplace, defaults to False
|
|
537
|
+
:type inplace: bool, optional
|
|
538
|
+
:return: Resulting interaction matrix
|
|
539
|
+
:rtype: InteractionMatrix
|
|
540
|
+
"""
|
|
541
|
+
logger.debug("Performing get_user_n_last_interaction comparison")
|
|
542
|
+
return self._get_last_n_interactions(ItemUserBasedEnum.USER, n_seq_data, t_upper, user_in, inplace)
|
|
543
|
+
|
|
544
|
+
def get_items_n_last_interaction(
|
|
545
|
+
self,
|
|
546
|
+
n_seq_data: int = 1,
|
|
547
|
+
t_upper: None | int = None,
|
|
548
|
+
item_in: None | set[int] = None,
|
|
549
|
+
inplace: bool = False,
|
|
550
|
+
) -> "InteractionMatrix":
|
|
551
|
+
"""Select the last n interactions for each item.
|
|
552
|
+
|
|
553
|
+
:param n_seq_data: Number of interactions to select, defaults to 1
|
|
554
|
+
:type n_seq_data: int, optional
|
|
555
|
+
:param t_upper: Seconds past t. Upper limit for the timestamp
|
|
556
|
+
of the interactions to select, defaults to None
|
|
557
|
+
:type t_upper: None | int, optional
|
|
558
|
+
:param item_in: set of item IDs to select the interactions from,
|
|
559
|
+
defaults to None
|
|
560
|
+
:type item_in: None | set[int], optional
|
|
561
|
+
:param inplace: If operation is inplace, defaults to False
|
|
562
|
+
:type inplace: bool, optional
|
|
563
|
+
:return: Resulting interaction matrix
|
|
564
|
+
:rtype: InteractionMatrix
|
|
565
|
+
"""
|
|
566
|
+
logger.debug("Performing get_item_n_last_interaction comparison")
|
|
567
|
+
return self._get_last_n_interactions(ItemUserBasedEnum.ITEM, n_seq_data, t_upper, item_in, inplace)
|
|
568
|
+
|
|
569
|
+
def get_users_n_first_interaction(
|
|
570
|
+
self, n_seq_data: int = 1, t_lower: None | int = None, inplace=False
|
|
571
|
+
) -> "InteractionMatrix":
|
|
572
|
+
"""Select the first n interactions for each user.
|
|
573
|
+
|
|
574
|
+
:param n_seq_data: Number of interactions to select, defaults to 1
|
|
575
|
+
:type n_seq_data: int, optional
|
|
576
|
+
:param t_lower: Seconds past t. Lower limit for the timestamp
|
|
577
|
+
of the interactions to select, defaults to None
|
|
578
|
+
:type t_lower: None | int, optional
|
|
579
|
+
:param inplace: If operation is inplace, defaults to False
|
|
580
|
+
:type inplace: bool, optional
|
|
581
|
+
:return: Resulting interaction matrix
|
|
582
|
+
:rtype: InteractionMatrix
|
|
583
|
+
"""
|
|
584
|
+
return self._get_first_n_interactions(ItemUserBasedEnum.USER, n_seq_data, t_lower, inplace)
|
|
585
|
+
|
|
586
|
+
def get_items_n_first_interaction(
|
|
587
|
+
self, n_seq_data: int = 1, t_lower: None | int = None, inplace=False
|
|
588
|
+
) -> "InteractionMatrix":
|
|
589
|
+
"""Select the first n interactions for each item.
|
|
590
|
+
|
|
591
|
+
:param n_seq_data: Number of interactions to select, defaults to 1
|
|
592
|
+
:type n_seq_data: int, optional
|
|
593
|
+
:param t_lower: Seconds past t. Lower limit for the timestamp
|
|
594
|
+
of the interactions to select, defaults to None
|
|
595
|
+
:type t_lower: None | int, optional
|
|
596
|
+
:param inplace: If operation is inplace, defaults to False
|
|
597
|
+
:type inplace: bool, optional
|
|
598
|
+
:return: Resulting interaction matrix
|
|
599
|
+
:rtype: InteractionMatrix
|
|
600
|
+
"""
|
|
601
|
+
return self._get_first_n_interactions(ItemUserBasedEnum.ITEM, n_seq_data, t_lower, inplace)
|
|
602
|
+
|
|
603
|
+
@property
|
|
604
|
+
def item_interaction_sequence_matrix(self) -> csr_matrix:
|
|
605
|
+
"""Converts the interaction data into an item interaction sequence matrix.
|
|
606
|
+
|
|
607
|
+
Dataframe values are converted into such that the row sequence is maintained and
|
|
608
|
+
the item that interacted with will have the column at item_id marked with 1.
|
|
609
|
+
"""
|
|
610
|
+
values = np.ones(self._df.shape[0])
|
|
611
|
+
indices = (np.arange(self._df.shape[0]), self._df[InteractionMatrix.ITEM_IX].to_numpy())
|
|
612
|
+
shape = (self._df.shape[0], self.user_item_shape[1])
|
|
613
|
+
|
|
614
|
+
sparse_matrix = csr_matrix((values, indices), shape=shape, dtype=values.dtype)
|
|
615
|
+
return sparse_matrix
|
|
616
|
+
|
|
617
|
+
@property
|
|
618
|
+
def user_id_sequence_array(self) -> np.ndarray:
|
|
619
|
+
"""Array of user IDs in the order of interactions.
|
|
620
|
+
|
|
621
|
+
:return: Numpy array of user IDs.
|
|
622
|
+
:rtype: np.ndarray
|
|
623
|
+
"""
|
|
624
|
+
return self._df[InteractionMatrix.USER_IX].to_numpy()
|
|
625
|
+
|
|
626
|
+
def get_prediction_data(self) -> "InteractionMatrix":
|
|
627
|
+
"""Get the data to be predicted.
|
|
628
|
+
|
|
629
|
+
:return: InteractionMatrix with only the data to be predicted.
|
|
630
|
+
:rtype: InteractionMatrix
|
|
631
|
+
"""
|
|
632
|
+
return self.items_in({-1})
|
|
633
|
+
|
|
634
|
+
def get_interaction_data(self) -> "InteractionMatrix":
|
|
635
|
+
"""Get the data that is not denoted by "-1".
|
|
636
|
+
|
|
637
|
+
User and item IDs that are not denoted by "-1" are the ones that are
|
|
638
|
+
known to the model.
|
|
639
|
+
|
|
640
|
+
:return: InteractionMatrix with only the known data.
|
|
641
|
+
:rtype: InteractionMatrix
|
|
642
|
+
"""
|
|
643
|
+
mask = (self._df[InteractionMatrix.USER_IX] != -1) & (self._df[InteractionMatrix.ITEM_IX] != -1)
|
|
644
|
+
return self._apply_mask(mask)
|
|
645
|
+
|
|
646
|
+
@property
|
|
647
|
+
def user_ids(self) -> set[int]:
|
|
648
|
+
"""The set of all user ID in matrix"""
|
|
649
|
+
return set(self._df[InteractionMatrix.USER_IX].dropna().unique())
|
|
650
|
+
|
|
651
|
+
@property
|
|
652
|
+
def item_ids(self) -> set[int]:
|
|
653
|
+
"""The set of all item ID in matrix"""
|
|
654
|
+
return set(self._df[InteractionMatrix.ITEM_IX].dropna().unique())
|
|
655
|
+
|
|
656
|
+
@property
|
|
657
|
+
def num_interactions(self) -> int:
|
|
658
|
+
"""The total number of interactions.
|
|
659
|
+
|
|
660
|
+
:return: Total interaction count.
|
|
661
|
+
:rtype: int
|
|
662
|
+
"""
|
|
663
|
+
return len(self._df)
|
|
664
|
+
|
|
665
|
+
@property
|
|
666
|
+
def has_timestamps(self) -> bool:
|
|
667
|
+
"""Boolean indicating whether instance has timestamp information.
|
|
668
|
+
|
|
669
|
+
:return: True if timestamps information is available, False otherwise.
|
|
670
|
+
:rtype: bool
|
|
671
|
+
"""
|
|
672
|
+
return self.TIMESTAMP_IX in self._df
|
|
673
|
+
|
|
674
|
+
@property
|
|
675
|
+
def min_timestamp(self) -> int:
|
|
676
|
+
"""The earliest timestamp in the interaction
|
|
677
|
+
|
|
678
|
+
:return: The earliest timestamp.
|
|
679
|
+
:rtype: int
|
|
680
|
+
"""
|
|
681
|
+
if not self.has_timestamps:
|
|
682
|
+
raise TimestampAttributeMissingError()
|
|
683
|
+
return self._df[self.TIMESTAMP_IX].min()
|
|
684
|
+
|
|
685
|
+
@property
|
|
686
|
+
def max_timestamp(self) -> int:
|
|
687
|
+
"""The latest timestamp in the interaction
|
|
688
|
+
|
|
689
|
+
:return: The latest timestamp.
|
|
690
|
+
:rtype: int
|
|
691
|
+
"""
|
|
692
|
+
if not self.has_timestamps:
|
|
693
|
+
raise TimestampAttributeMissingError()
|
|
694
|
+
return self._df[self.TIMESTAMP_IX].max()
|
|
695
|
+
|
|
696
|
+
@property
|
|
697
|
+
def max_global_user_id(self) -> int:
|
|
698
|
+
"""The highest known global user ID in the interaction matrix.
|
|
699
|
+
|
|
700
|
+
The difference between `max_global_user_id` and `max_user_id` is that
|
|
701
|
+
`max_global_user_id` considers all user IDs present in the dataframe,
|
|
702
|
+
including users that are only encountered during prediction time.
|
|
703
|
+
"""
|
|
704
|
+
return max(int(self._df[InteractionMatrix.USER_IX].max()) + 1, self.user_item_shape[0])
|
|
705
|
+
|
|
706
|
+
@property
|
|
707
|
+
def max_global_item_id(self) -> int:
|
|
708
|
+
return max(int(self._df[InteractionMatrix.ITEM_IX].max()) + 1, self.user_item_shape[1])
|
|
709
|
+
|
|
710
|
+
@property
|
|
711
|
+
def max_known_user_id(self) -> int:
|
|
712
|
+
"""The highest known user ID in the interaction matrix."""
|
|
713
|
+
max_val = self._df[(self._df != -1).all(axis=1)][InteractionMatrix.USER_IX].max()
|
|
714
|
+
if pd.isna(max_val):
|
|
715
|
+
return self.user_item_shape[0]
|
|
716
|
+
return min(int(max_val) + 1, self.user_item_shape[0])
|
|
717
|
+
|
|
718
|
+
@property
|
|
719
|
+
def max_known_item_id(self) -> int:
|
|
720
|
+
"""The highest known user ID in the interaction matrix."""
|
|
721
|
+
max_val = self._df[(self._df != -1).all(axis=1)][InteractionMatrix.ITEM_IX].max()
|
|
722
|
+
if pd.isna(max_val):
|
|
723
|
+
return self.user_item_shape[1]
|
|
724
|
+
return min(int(max_val) + 1, self.user_item_shape[1])
|
|
725
|
+
|
|
726
|
+
@property
|
|
727
|
+
def max_user_id(self) -> int:
|
|
728
|
+
"""The highest known user ID in the interaction matrix.
|
|
729
|
+
|
|
730
|
+
:return: The highest user ID.
|
|
731
|
+
:rtype: int
|
|
732
|
+
"""
|
|
733
|
+
max_val = self._df[self._df != -1][InteractionMatrix.USER_IX].max()
|
|
734
|
+
if np.isnan(max_val):
|
|
735
|
+
return -1
|
|
736
|
+
return max_val
|
|
737
|
+
|
|
738
|
+
@property
|
|
739
|
+
def max_item_id(self) -> int:
|
|
740
|
+
"""The highest known item ID in the interaction matrix.
|
|
741
|
+
|
|
742
|
+
In the case of an empty matrix, the highest item ID is -1. This is
|
|
743
|
+
consistent with the the definition that -1 denotes the item that is
|
|
744
|
+
unknown. It would be incorrect to use any other value, since 0 is a
|
|
745
|
+
valid item ID.
|
|
746
|
+
|
|
747
|
+
:return: The highest item ID.
|
|
748
|
+
:rtype: int
|
|
749
|
+
"""
|
|
750
|
+
max_val = self._df[self._df != -1][InteractionMatrix.ITEM_IX].max()
|
|
751
|
+
if np.isnan(max_val):
|
|
752
|
+
return -1
|
|
753
|
+
return max_val
|
|
754
|
+
|
|
755
|
+
@property
|
|
756
|
+
def timestamps(self) -> pd.Series:
|
|
757
|
+
"""The interaction timestamps indexed by user and item ID.
|
|
758
|
+
|
|
759
|
+
:raises
|
|
760
|
+
"""
|
|
761
|
+
"""Timestamps of interactions as a pandas Series, indexed by user ID and item ID.
|
|
762
|
+
|
|
763
|
+
:raises TimestampAttributeMissingError: If timestamp column is missing.
|
|
764
|
+
:return: Interactions with composite index on (user ID, item ID)
|
|
765
|
+
:rtype: pd.Series
|
|
766
|
+
"""
|
|
767
|
+
if not self.has_timestamps:
|
|
768
|
+
raise TimestampAttributeMissingError()
|
|
769
|
+
index = pd.MultiIndex.from_frame(self._df[[InteractionMatrix.USER_IX, InteractionMatrix.ITEM_IX]])
|
|
770
|
+
return pd.DataFrame(self._df[[InteractionMatrix.TIMESTAMP_IX]]).set_index(index)[InteractionMatrix.TIMESTAMP_IX]
|
|
771
|
+
|
|
772
|
+
@property
|
|
773
|
+
def latest_interaction_timestamps_matrix(self) -> csr_matrix:
|
|
774
|
+
"""A csr matrix containing the last interaction timestamp for each user, item pair.
|
|
775
|
+
|
|
776
|
+
We only account for the last interacted timestamp making the dataset non-deduplicated.
|
|
777
|
+
"""
|
|
778
|
+
timestamps = self.timestamps.groupby(["uid", "iid"]).max().reset_index()
|
|
779
|
+
timestamp_mat = csr_matrix(
|
|
780
|
+
(timestamps.ts.values, (timestamps.uid.values, timestamps.iid.values)),
|
|
781
|
+
shape=self.user_item_shape,
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
return timestamp_mat
|