recnexteval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recnexteval/__init__.py +20 -0
- recnexteval/algorithms/__init__.py +99 -0
- recnexteval/algorithms/base.py +377 -0
- recnexteval/algorithms/baseline/__init__.py +10 -0
- recnexteval/algorithms/baseline/decay_popularity.py +110 -0
- recnexteval/algorithms/baseline/most_popular.py +72 -0
- recnexteval/algorithms/baseline/random.py +39 -0
- recnexteval/algorithms/baseline/recent_popularity.py +34 -0
- recnexteval/algorithms/itemknn/__init__.py +14 -0
- recnexteval/algorithms/itemknn/itemknn.py +119 -0
- recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
- recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
- recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
- recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
- recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
- recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
- recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
- recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
- recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
- recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
- recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
- recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
- recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
- recnexteval/algorithms/utils.py +51 -0
- recnexteval/datasets/__init__.py +109 -0
- recnexteval/datasets/base.py +316 -0
- recnexteval/datasets/config/__init__.py +113 -0
- recnexteval/datasets/config/amazon.py +188 -0
- recnexteval/datasets/config/base.py +72 -0
- recnexteval/datasets/config/lastfm.py +105 -0
- recnexteval/datasets/config/movielens.py +169 -0
- recnexteval/datasets/config/yelp.py +25 -0
- recnexteval/datasets/datasets/__init__.py +24 -0
- recnexteval/datasets/datasets/amazon.py +151 -0
- recnexteval/datasets/datasets/base.py +250 -0
- recnexteval/datasets/datasets/lastfm.py +121 -0
- recnexteval/datasets/datasets/movielens.py +93 -0
- recnexteval/datasets/datasets/test.py +46 -0
- recnexteval/datasets/datasets/yelp.py +103 -0
- recnexteval/datasets/metadata/__init__.py +58 -0
- recnexteval/datasets/metadata/amazon.py +68 -0
- recnexteval/datasets/metadata/base.py +38 -0
- recnexteval/datasets/metadata/lastfm.py +110 -0
- recnexteval/datasets/metadata/movielens.py +87 -0
- recnexteval/evaluators/__init__.py +189 -0
- recnexteval/evaluators/accumulator.py +167 -0
- recnexteval/evaluators/base.py +216 -0
- recnexteval/evaluators/builder/__init__.py +125 -0
- recnexteval/evaluators/builder/base.py +166 -0
- recnexteval/evaluators/builder/pipeline.py +111 -0
- recnexteval/evaluators/builder/stream.py +54 -0
- recnexteval/evaluators/evaluator_pipeline.py +287 -0
- recnexteval/evaluators/evaluator_stream.py +374 -0
- recnexteval/evaluators/state_management.py +310 -0
- recnexteval/evaluators/strategy.py +32 -0
- recnexteval/evaluators/util.py +124 -0
- recnexteval/matrix/__init__.py +48 -0
- recnexteval/matrix/exception.py +5 -0
- recnexteval/matrix/interaction_matrix.py +784 -0
- recnexteval/matrix/prediction_matrix.py +153 -0
- recnexteval/matrix/util.py +24 -0
- recnexteval/metrics/__init__.py +57 -0
- recnexteval/metrics/binary/__init__.py +4 -0
- recnexteval/metrics/binary/hit.py +49 -0
- recnexteval/metrics/core/__init__.py +10 -0
- recnexteval/metrics/core/base.py +126 -0
- recnexteval/metrics/core/elementwise_top_k.py +75 -0
- recnexteval/metrics/core/listwise_top_k.py +72 -0
- recnexteval/metrics/core/top_k.py +60 -0
- recnexteval/metrics/core/util.py +29 -0
- recnexteval/metrics/ranking/__init__.py +6 -0
- recnexteval/metrics/ranking/dcg.py +55 -0
- recnexteval/metrics/ranking/ndcg.py +78 -0
- recnexteval/metrics/ranking/precision.py +51 -0
- recnexteval/metrics/ranking/recall.py +42 -0
- recnexteval/models/__init__.py +4 -0
- recnexteval/models/base.py +69 -0
- recnexteval/preprocessing/__init__.py +37 -0
- recnexteval/preprocessing/filter.py +181 -0
- recnexteval/preprocessing/preprocessor.py +137 -0
- recnexteval/registries/__init__.py +67 -0
- recnexteval/registries/algorithm.py +68 -0
- recnexteval/registries/base.py +131 -0
- recnexteval/registries/dataset.py +37 -0
- recnexteval/registries/metric.py +57 -0
- recnexteval/settings/__init__.py +127 -0
- recnexteval/settings/base.py +414 -0
- recnexteval/settings/exception.py +8 -0
- recnexteval/settings/leave_n_out_setting.py +48 -0
- recnexteval/settings/processor.py +115 -0
- recnexteval/settings/schema.py +11 -0
- recnexteval/settings/single_time_point_setting.py +111 -0
- recnexteval/settings/sliding_window_setting.py +153 -0
- recnexteval/settings/splitters/__init__.py +14 -0
- recnexteval/settings/splitters/base.py +57 -0
- recnexteval/settings/splitters/n_last.py +39 -0
- recnexteval/settings/splitters/n_last_timestamp.py +76 -0
- recnexteval/settings/splitters/timestamp.py +82 -0
- recnexteval/settings/util.py +0 -0
- recnexteval/utils/__init__.py +115 -0
- recnexteval/utils/json_to_csv_converter.py +128 -0
- recnexteval/utils/logging_tools.py +159 -0
- recnexteval/utils/path.py +155 -0
- recnexteval/utils/url_certificate_installer.py +54 -0
- recnexteval/utils/util.py +166 -0
- recnexteval/utils/uuid_util.py +7 -0
- recnexteval/utils/yaml_tool.py +65 -0
- recnexteval-0.1.0.dist-info/METADATA +85 -0
- recnexteval-0.1.0.dist-info/RECORD +110 -0
- recnexteval-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from warnings import warn
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from .interaction_matrix import InteractionMatrix
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PredictionMatrix(InteractionMatrix):
|
|
14
|
+
@classmethod
|
|
15
|
+
def from_interaction_matrix(cls, im: InteractionMatrix) -> "PredictionMatrix":
|
|
16
|
+
"""Create a PredictionMatrix from an InteractionMatrix.
|
|
17
|
+
|
|
18
|
+
:param im: The InteractionMatrix to convert.
|
|
19
|
+
:type im: InteractionMatrix
|
|
20
|
+
:return: A new PredictionMatrix with the same data.
|
|
21
|
+
:rtype: PredictionMatrix
|
|
22
|
+
"""
|
|
23
|
+
return cls(
|
|
24
|
+
df=im._df,
|
|
25
|
+
item_ix=im.ITEM_IX,
|
|
26
|
+
user_ix=im.USER_IX,
|
|
27
|
+
timestamp_ix=im.TIMESTAMP_IX,
|
|
28
|
+
shape=getattr(im, 'shape', None),
|
|
29
|
+
skip_df_processing=True,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def mask_user_item_shape(
|
|
33
|
+
self,
|
|
34
|
+
shape: None | tuple[int, int] = None,
|
|
35
|
+
drop_unknown_user: bool = False,
|
|
36
|
+
drop_unknown_item: bool = False,
|
|
37
|
+
inherit_max_id: bool = False,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Masks global user and item ID.
|
|
40
|
+
|
|
41
|
+
To ensure released matrix released to the models only contains data
|
|
42
|
+
that is intended to be released. This addresses the data leakage issue.
|
|
43
|
+
It is recommended that the programmer defines the shape of the matrix
|
|
44
|
+
such that the model only sees the data that is intended to be seen.
|
|
45
|
+
|
|
46
|
+
=======
|
|
47
|
+
Example
|
|
48
|
+
=======
|
|
49
|
+
|
|
50
|
+
Given the following case where the data is as follows::
|
|
51
|
+
|
|
52
|
+
> uid: [0, 1, 2, 3, 4, 5]
|
|
53
|
+
> iid: [0, 1, 2, 3, -1, -1]
|
|
54
|
+
> ts : [0, 1, 2, 3, 4, 6]
|
|
55
|
+
|
|
56
|
+
Where user 4, 5 is the user to be predicted. Assuming that user 4, 5 is an
|
|
57
|
+
unknown user, that is, the model has never seen user 4, 5 before. The shape
|
|
58
|
+
of the matrix should be (4, 4). This should be defined when calling the
|
|
59
|
+
function in :param:`shape`.
|
|
60
|
+
|
|
61
|
+
If the shape is defined, and it contains ID of unknown user/item, a warning
|
|
62
|
+
will be raised if :attr:`drop_unknown` is set to False. If :attr:`drop_unknown`
|
|
63
|
+
is set to True, the unknown user/item will be dropped from the data. All
|
|
64
|
+
user/item ID greater than `shape[0]` will be dropped. This follows from
|
|
65
|
+
the initial assumption that the user/item ID starts from 0 as defined in
|
|
66
|
+
the dataset class.
|
|
67
|
+
|
|
68
|
+
Else, in the event that :param:`shape` is not defined, the shape will be
|
|
69
|
+
inferred from the data. The shape will be determined by the number of
|
|
70
|
+
unique users/items. In this case the shape will be (5, 4). Note that the
|
|
71
|
+
shape may not be as intended by the programmer if the data contains
|
|
72
|
+
unknown users/items or if the dataframe does not contain all historical
|
|
73
|
+
users/items.
|
|
74
|
+
|
|
75
|
+
:param shape: Shape of the known user and item base. This value is
|
|
76
|
+
usually set by the evaluator during the evaluation run. This value
|
|
77
|
+
can also be set manually but the programmer if there is a need to
|
|
78
|
+
alter the known user/item base. Defaults to None
|
|
79
|
+
:type shape: Optional[tuple[int, int]], optional
|
|
80
|
+
:param drop_unknown_user: To drop unknown users in the dataset,
|
|
81
|
+
defaults to False
|
|
82
|
+
:type drop_unknown_user: bool, optional
|
|
83
|
+
:param drop_unknown_item: To drop unknown items in the dataset,
|
|
84
|
+
defaults to False
|
|
85
|
+
:type drop_unknown_item: bool, optional
|
|
86
|
+
:param inherit_max_id: To inherit the maximum user and item ID from the
|
|
87
|
+
given shape and the dataframe. This is useful when the shape is
|
|
88
|
+
defined and the dataframe contains unknown users/items. Defaults to False
|
|
89
|
+
:type inherit_max_id: bool, optional
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
if not shape:
|
|
93
|
+
# infer shape from the data
|
|
94
|
+
known_user = np.nan_to_num(self._df[self._df != -1][InteractionMatrix.USER_IX].max(), nan=-1)
|
|
95
|
+
known_item = np.nan_to_num(self._df[self._df != -1][InteractionMatrix.ITEM_IX].max(), nan=-1)
|
|
96
|
+
self.user_item_shape = (known_user, known_item)
|
|
97
|
+
logger.debug(f"(user x item) shape inferred is {self.user_item_shape}")
|
|
98
|
+
if known_user == -1 or known_item == -1:
|
|
99
|
+
warn(
|
|
100
|
+
"One of the dimensions of the shape cannot be inferred from the data. "
|
|
101
|
+
"Call mask_shape() with shape parameter.",
|
|
102
|
+
stacklevel=2,
|
|
103
|
+
)
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
logger.debug(
|
|
107
|
+
f"(user x item) shape defined is {shape}. "
|
|
108
|
+
f"Shape of dataframe stored in matrix was {self._df.shape} before masking"
|
|
109
|
+
)
|
|
110
|
+
if drop_unknown_user:
|
|
111
|
+
logger.debug("Dropping unknown users from interaction matrix based on defined shape")
|
|
112
|
+
self._df = pd.DataFrame(self._df[self._df[InteractionMatrix.USER_IX] < shape[0]])
|
|
113
|
+
if drop_unknown_item:
|
|
114
|
+
logger.debug("Dropping unknown items from interaction matrix based on defined shape")
|
|
115
|
+
self._df = pd.DataFrame(self._df[self._df[InteractionMatrix.ITEM_IX] < shape[1]])
|
|
116
|
+
logger.debug(f"Shape of dataframe stored in matrix is now {self._df.shape} after masking")
|
|
117
|
+
|
|
118
|
+
if inherit_max_id:
|
|
119
|
+
# we are only concerned about the absolute maximum id in the data regardless if its unknown
|
|
120
|
+
known_user = int(self._df[InteractionMatrix.USER_IX].max())
|
|
121
|
+
known_item = int(self._df[InteractionMatrix.ITEM_IX].max())
|
|
122
|
+
# + 1 as id starts from 0
|
|
123
|
+
self.user_item_shape = (max(shape[0], known_user + 1), max(shape[1], known_item + 1))
|
|
124
|
+
else:
|
|
125
|
+
self.user_item_shape = shape
|
|
126
|
+
logger.debug(f"Final (user x item) shape defined is {self.user_item_shape}")
|
|
127
|
+
self._check_user_item_shape()
|
|
128
|
+
|
|
129
|
+
def _check_user_item_shape(self) -> None:
|
|
130
|
+
if not hasattr(self, "user_item_shape"):
|
|
131
|
+
raise AttributeError("InteractionMatrix has no `user_item_shape` attribute. Please call mask_shape() first.")
|
|
132
|
+
if self.user_item_shape[0] is None or self.user_item_shape[1] is None:
|
|
133
|
+
raise ValueError("Shape must be defined.")
|
|
134
|
+
|
|
135
|
+
valid_df = self._df[self._df != -1]
|
|
136
|
+
req_rows = valid_df[InteractionMatrix.USER_IX].max()
|
|
137
|
+
req_cols = np.nan_to_num(valid_df[InteractionMatrix.ITEM_IX].max(), nan=-1)
|
|
138
|
+
|
|
139
|
+
if self.user_item_shape[0] < req_rows or self.user_item_shape[1] < req_cols:
|
|
140
|
+
logger.warning(
|
|
141
|
+
"InteractionMatrix shape mismatch detected. "
|
|
142
|
+
"Current shape: %s. Required minimum: (%s, %s). "
|
|
143
|
+
"Data loss may occur.",
|
|
144
|
+
self.user_item_shape,
|
|
145
|
+
req_rows,
|
|
146
|
+
req_cols,
|
|
147
|
+
)
|
|
148
|
+
warn(
|
|
149
|
+
"Provided shape does not match known id; there are id that are out of bounds. "
|
|
150
|
+
"Call mask_shape(drop_unknown=True) to drop unknown users and items.",
|
|
151
|
+
category=UserWarning,
|
|
152
|
+
stacklevel=2,
|
|
153
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from scipy.sparse import csr_matrix
|
|
2
|
+
|
|
3
|
+
from recnexteval.matrix.interaction_matrix import InteractionMatrix
|
|
4
|
+
from recnexteval.utils.util import to_binary
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def to_csr_matrix(X: InteractionMatrix | csr_matrix, binary: bool = False) -> csr_matrix:
|
|
8
|
+
"""Convert a matrix-like object to a scipy csr_matrix.
|
|
9
|
+
|
|
10
|
+
:param X: Matrix-like object or tuple of objects to convert.
|
|
11
|
+
:type X: csr_matrix
|
|
12
|
+
:param binary: If true, ensure matrix is binary by setting non-zero values to 1.
|
|
13
|
+
:type binary: bool, optional
|
|
14
|
+
:return: Matrices as csr_matrix.
|
|
15
|
+
:rtype: Union[csr_matrix, Tuple[csr_matrix, ...]]
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
if isinstance(X, csr_matrix):
|
|
19
|
+
res = X
|
|
20
|
+
elif isinstance(X, InteractionMatrix):
|
|
21
|
+
res = X.values
|
|
22
|
+
else:
|
|
23
|
+
raise AttributeError("Not supported Matrix conversion")
|
|
24
|
+
return to_binary(res) if binary else res
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Metrics module for evaluating recommender system performance.
|
|
2
|
+
|
|
3
|
+
This module provides a collection of metrics for evaluating the performance of
|
|
4
|
+
recommender systems in streaming environments. Metrics are implemented as classes
|
|
5
|
+
that inherit from the `Metric` base class, allowing for easy extension and customization.
|
|
6
|
+
|
|
7
|
+
## Available Metrics
|
|
8
|
+
|
|
9
|
+
The following metrics are currently available:
|
|
10
|
+
|
|
11
|
+
- `PrecisionK`: Precision at K
|
|
12
|
+
- `RecallK`: Recall at K
|
|
13
|
+
- `DCGK`: Discounted Cumulative Gain at K
|
|
14
|
+
- `NDCGK`: Normalized Discounted Cumulative Gain at K
|
|
15
|
+
- `HitK`: Hit Rate at K
|
|
16
|
+
|
|
17
|
+
## Using Metrics
|
|
18
|
+
|
|
19
|
+
To use a metric, simply instantiate the corresponding class and call the `evaluate` method
|
|
20
|
+
with the predicted and ground truth rankings:
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from recnexteval.metrics import PrecisionK
|
|
24
|
+
|
|
25
|
+
metric = PrecisionK(k=10)
|
|
26
|
+
score = metric.evaluate(
|
|
27
|
+
predicted_ranking, ground_truth_ranking
|
|
28
|
+
)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
The `evaluate` method returns a single float value representing the metric score.
|
|
32
|
+
|
|
33
|
+
## Extending the Framework
|
|
34
|
+
|
|
35
|
+
To add custom metrics, inherit from the `Metric` base class and implement the `evaluate` method.
|
|
36
|
+
Refer to the base class documentation for implementation details.
|
|
37
|
+
|
|
38
|
+
# Related Modules
|
|
39
|
+
|
|
40
|
+
- recnexteval.evaluators: Evaluator classes for running metrics over data streams
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from .binary import HitK
|
|
44
|
+
from .core import ListwiseMetricK, Metric, MetricTopK
|
|
45
|
+
from .ranking import DCGK, NDCGK, PrecisionK, RecallK
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
__all__ = [
|
|
49
|
+
"Metric",
|
|
50
|
+
"PrecisionK",
|
|
51
|
+
"RecallK",
|
|
52
|
+
"DCGK",
|
|
53
|
+
"NDCGK",
|
|
54
|
+
"HitK",
|
|
55
|
+
"ListwiseMetricK",
|
|
56
|
+
"MetricTopK",
|
|
57
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Adopted from RecPack, An Experimentation Toolkit for Top-N Recommendation
|
|
2
|
+
# Copyright (C) 2020 Froomle N.V.
|
|
3
|
+
# License: GNU AGPLv3 - https://gitlab.com/recpack-maintainers/recpack/-/blob/master/LICENSE
|
|
4
|
+
# Author:
|
|
5
|
+
# Lien Michiels
|
|
6
|
+
# Robin Verachtert
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
from scipy.sparse import csr_matrix, lil_matrix
|
|
11
|
+
|
|
12
|
+
from ..core.elementwise_top_k import ElementwiseMetricK
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HitK(ElementwiseMetricK):
|
|
19
|
+
"""Computes the number of hits in a list of Top-K recommendations.
|
|
20
|
+
|
|
21
|
+
A hit is counted when a recommended item in the top K for this user was interacted with.
|
|
22
|
+
|
|
23
|
+
Detailed :attr:`results` show which of the items in the list of Top-K recommended items
|
|
24
|
+
were hits and which were not.
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
:param K: Size of the recommendation list consisting of the Top-K item predictions.
|
|
28
|
+
:type K: int
|
|
29
|
+
|
|
30
|
+
This code is adapted from RecPack :cite:`recpack`
|
|
31
|
+
"""
|
|
32
|
+
IS_BASE: bool = False
|
|
33
|
+
|
|
34
|
+
def _calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
|
|
35
|
+
# log number of users and ground truth interactions
|
|
36
|
+
logger.debug(f"HitK compute started - {self.name}")
|
|
37
|
+
logger.debug(f"Number of users: {y_true.shape[0]}")
|
|
38
|
+
logger.debug(f"Number of ground truth interactions: {y_true.nnz}")
|
|
39
|
+
|
|
40
|
+
scores = lil_matrix(y_pred.shape)
|
|
41
|
+
|
|
42
|
+
# Elementwise multiplication of top K predicts and true interactions
|
|
43
|
+
scores[y_pred.multiply(y_true).astype(bool)] = 1
|
|
44
|
+
|
|
45
|
+
scores = scores.tocsr()
|
|
46
|
+
binary_score = (scores.sum(axis=1) >= 1).astype(int)
|
|
47
|
+
self._scores = csr_matrix(binary_score)
|
|
48
|
+
|
|
49
|
+
logger.debug(f"HitK compute complete - {self.name}")
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from warnings import warn
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from scipy.sparse import csr_matrix
|
|
7
|
+
|
|
8
|
+
from ...algorithms.utils import get_top_K_ranks
|
|
9
|
+
from ...models import BaseModel, ParamMixin
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Metric(BaseModel, ParamMixin):
|
|
16
|
+
"""Base class for all metrics.
|
|
17
|
+
|
|
18
|
+
A Metric object is stateful, i.e. after `calculate`
|
|
19
|
+
the results can be retrieved in one of two ways:
|
|
20
|
+
- Detailed results are stored in :attr:`results`,
|
|
21
|
+
- Aggregated result value can be retrieved using :attr:`value`
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
_scores: None | csr_matrix
|
|
25
|
+
_user_id_map: np.ndarray
|
|
26
|
+
_y_true: csr_matrix
|
|
27
|
+
_y_pred: csr_matrix
|
|
28
|
+
_user_id_sequence_array: np.ndarray
|
|
29
|
+
"""Sequence of user IDs in the evaluation data."""
|
|
30
|
+
_num_users: int
|
|
31
|
+
_true_positive: int
|
|
32
|
+
"""Number of true positives computed. Used for caching to obtain macro results."""
|
|
33
|
+
_false_negative: int
|
|
34
|
+
"""Number of false negatives computed. Used for caching to obtain macro results."""
|
|
35
|
+
_false_positive: int
|
|
36
|
+
"""Number of false positives computed. Used for caching to obtain macro results."""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
user_id_sequence_array: np.ndarray,
|
|
41
|
+
user_item_shape: tuple[int, int],
|
|
42
|
+
timestamp_limit: None | int = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
self._user_id_sequence_array = user_id_sequence_array
|
|
45
|
+
self._num_users, self._num_items = user_item_shape
|
|
46
|
+
self._timestamp_limit: None | int = timestamp_limit
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def _is_computed(self) -> bool:
|
|
50
|
+
"""Whether the metric has been computed."""
|
|
51
|
+
return hasattr(self, "_scores")
|
|
52
|
+
|
|
53
|
+
def get_params(self) -> dict[str, int | None]:
|
|
54
|
+
"""Get the parameters of the metric."""
|
|
55
|
+
if not self.is_time_aware:
|
|
56
|
+
return {}
|
|
57
|
+
return {"timestamp_limit": self._timestamp_limit}
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def micro_result(self) -> dict[str, np.ndarray]:
|
|
61
|
+
"""Micro results for the metric.
|
|
62
|
+
|
|
63
|
+
:return: Detailed results for the metric.
|
|
64
|
+
:rtype: dict[str, np.ndarray]
|
|
65
|
+
"""
|
|
66
|
+
return {"score": np.array(self.macro_result)}
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def macro_result(self) -> None | float:
|
|
70
|
+
"""The global metric value."""
|
|
71
|
+
raise NotImplementedError()
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def is_time_aware(self) -> bool:
|
|
75
|
+
"""Whether the metric is time-aware."""
|
|
76
|
+
return self._timestamp_limit is not None
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def timestamp_limit(self) -> int:
|
|
80
|
+
"""The timestamp limit for the metric."""
|
|
81
|
+
if not self.is_time_aware or self._timestamp_limit is None:
|
|
82
|
+
raise ValueError("This metric is not time-aware.")
|
|
83
|
+
return self._timestamp_limit
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def num_items(self) -> int:
|
|
87
|
+
"""Dimension of the item-space in both `y_true` and `y_pred`"""
|
|
88
|
+
return self._num_items
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def num_users(self) -> int:
|
|
92
|
+
"""Dimension of the user-space in both `y_true` and `y_pred`
|
|
93
|
+
after elimination of users without interactions in `y_true`.
|
|
94
|
+
"""
|
|
95
|
+
return self._num_users
|
|
96
|
+
|
|
97
|
+
def _prepare_matrix(
|
|
98
|
+
self, y_true: csr_matrix, y_pred: csr_matrix
|
|
99
|
+
) -> tuple[csr_matrix, csr_matrix]:
|
|
100
|
+
"""Prepare the matrices for the metric calculation.
|
|
101
|
+
|
|
102
|
+
This method is used to prepare the matrices for the metric calculation.
|
|
103
|
+
It is used to eliminate empty users and to set the shape of the matrices.
|
|
104
|
+
"""
|
|
105
|
+
if not y_true.shape == y_pred.shape:
|
|
106
|
+
raise AssertionError(
|
|
107
|
+
f"Shape mismatch between y_true: {y_true.shape} and y_pred: {y_pred.shape}"
|
|
108
|
+
)
|
|
109
|
+
self._set_shape(y_true=y_true)
|
|
110
|
+
return y_true, y_pred
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def _calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
|
|
114
|
+
raise NotImplementedError()
|
|
115
|
+
|
|
116
|
+
def calculate(self, y_true: csr_matrix, y_pred: csr_matrix) -> None:
|
|
117
|
+
"""Calculates this metric for all nonzero users in `y_true`,
|
|
118
|
+
given true labels and predicted scores.
|
|
119
|
+
"""
|
|
120
|
+
y_true, y_pred = self._prepare_matrix(y_true, y_pred)
|
|
121
|
+
self._calculate(y_true, y_pred)
|
|
122
|
+
|
|
123
|
+
def _set_shape(self, y_true: csr_matrix) -> None:
|
|
124
|
+
"""Set the number of users and items based on the shape of y_true.
|
|
125
|
+
"""
|
|
126
|
+
self._num_users, self._num_items = y_true.shape # type: ignore
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from warnings import warn
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from .top_k import MetricTopK
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ElementwiseMetricK(MetricTopK):
|
|
13
|
+
"""Base class for all elementwise metrics that can be calculated for
|
|
14
|
+
each user-item pair in the Top-K recommendations.
|
|
15
|
+
|
|
16
|
+
:attr:`results` contains an entry for each user-item pair.
|
|
17
|
+
|
|
18
|
+
Examples are: HitK
|
|
19
|
+
|
|
20
|
+
This code is adapted from RecPack :cite:`recpack`
|
|
21
|
+
|
|
22
|
+
:param K: Size of the recommendation list consisting of the Top-K item predictions.
|
|
23
|
+
:type K: int
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# TODO to fix this function
|
|
27
|
+
@property
|
|
28
|
+
def micro_result(self) -> dict[str, np.ndarray]:
|
|
29
|
+
if not self._is_computed:
|
|
30
|
+
raise ValueError("Metric has not been calculated yet.")
|
|
31
|
+
elif self._scores is None:
|
|
32
|
+
warn(UserWarning("No scores were computed. Returning empty dict."))
|
|
33
|
+
return dict(zip(self.col_names, (np.array([]), np.array([]))))
|
|
34
|
+
|
|
35
|
+
scores = self._scores.toarray().reshape(-1)
|
|
36
|
+
unique_users, inv = np.unique(self._user_id_sequence_array, return_inverse=True)
|
|
37
|
+
|
|
38
|
+
# Sum hits per user
|
|
39
|
+
sum_ones = np.zeros(len(unique_users))
|
|
40
|
+
np.add.at(sum_ones, inv, scores)
|
|
41
|
+
|
|
42
|
+
# Count recommendations per user
|
|
43
|
+
count_all = np.zeros(len(unique_users))
|
|
44
|
+
np.add.at(count_all, inv, 1)
|
|
45
|
+
|
|
46
|
+
# aggregated score per user
|
|
47
|
+
agg_score = sum_ones / count_all
|
|
48
|
+
|
|
49
|
+
return dict(zip(self.col_names, (unique_users, agg_score)))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def macro_result(self) -> None | float:
|
|
54
|
+
if not self._is_computed:
|
|
55
|
+
raise ValueError("Metric has not been calculated yet.")
|
|
56
|
+
elif self._scores is None:
|
|
57
|
+
logger.warning(UserWarning("No scores were computed. Returning Null value."))
|
|
58
|
+
return None
|
|
59
|
+
elif self._scores.size == 0:
|
|
60
|
+
logger.warning(
|
|
61
|
+
UserWarning(
|
|
62
|
+
f"All predictions were off or the ground truth matrix was empty during compute of {self.identifier}."
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
return 0
|
|
66
|
+
|
|
67
|
+
scores = self._scores.toarray().reshape(-1)
|
|
68
|
+
unique_users, inv = np.unique(self._user_id_sequence_array, return_inverse=True)
|
|
69
|
+
# get all users that was recommended at least a relevant item
|
|
70
|
+
sum_ones = np.zeros(len(unique_users))
|
|
71
|
+
np.add.at(sum_ones, inv, scores)
|
|
72
|
+
# Convert to binary: 1 if at least 1 hit, 0 otherwise
|
|
73
|
+
binary_hits = (sum_ones > 0).astype(int)
|
|
74
|
+
# Fraction of users with at least 1 hit
|
|
75
|
+
return binary_hits.mean().item()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .top_k import MetricTopK
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ListwiseMetricK(MetricTopK):
|
|
12
|
+
"""Base class for all listwise metrics that can be calculated for every Top-K recommendation list,
|
|
13
|
+
i.e. one value for each user.
|
|
14
|
+
Examples are: PrecisionK, RecallK, DCGK, NDCGK.
|
|
15
|
+
|
|
16
|
+
:param K: Size of the recommendation list consisting of the Top-K item predictions.
|
|
17
|
+
:type K: int
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def micro_result(self) -> dict[str, np.ndarray]:
|
|
22
|
+
"""User level results for the metric.
|
|
23
|
+
|
|
24
|
+
Contains an entry for every user.
|
|
25
|
+
|
|
26
|
+
:return: The results DataFrame with columns: user_id, score
|
|
27
|
+
:rtype: pd.DataFrame
|
|
28
|
+
"""
|
|
29
|
+
if not self._is_computed:
|
|
30
|
+
raise ValueError("Metric has not been calculated yet.")
|
|
31
|
+
elif self._scores is None:
|
|
32
|
+
logger.warning(UserWarning("No scores were computed. Returning empty dict."))
|
|
33
|
+
return dict(zip(self.col_names, (np.array([]), np.array([]))))
|
|
34
|
+
|
|
35
|
+
scores = self._scores.toarray().reshape(-1)
|
|
36
|
+
|
|
37
|
+
unique_users, inv = np.unique(self._user_id_sequence_array, return_inverse=True)
|
|
38
|
+
|
|
39
|
+
# sum of scores per user
|
|
40
|
+
sum_ones = np.zeros(len(unique_users))
|
|
41
|
+
np.add.at(sum_ones, inv, scores)
|
|
42
|
+
|
|
43
|
+
# count per user
|
|
44
|
+
count_all = np.zeros(len(unique_users))
|
|
45
|
+
np.add.at(count_all, inv, 1)
|
|
46
|
+
|
|
47
|
+
# aggregated score per user
|
|
48
|
+
agg_score = sum_ones / count_all
|
|
49
|
+
|
|
50
|
+
return dict(zip(self.col_names, (unique_users, agg_score)))
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def macro_result(self) -> None | float:
|
|
54
|
+
"""Global metric value obtained by taking the average over all users.
|
|
55
|
+
|
|
56
|
+
:raises ValueError: If the metric has not been calculated yet.
|
|
57
|
+
:return: The global metric value.
|
|
58
|
+
:rtype: float, optional
|
|
59
|
+
"""
|
|
60
|
+
if not self._is_computed:
|
|
61
|
+
raise ValueError("Metric has not been calculated yet.")
|
|
62
|
+
elif self._scores is None:
|
|
63
|
+
logger.warning(UserWarning("No scores were computed. Returning Null value."))
|
|
64
|
+
return None
|
|
65
|
+
elif self._scores.size == 0:
|
|
66
|
+
logger.warning(
|
|
67
|
+
UserWarning(
|
|
68
|
+
f"All predictions were off or the ground truth matrix was empty during compute of {self.identifier}."
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
return 0
|
|
72
|
+
return self._scores.mean().item()
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from warnings import warn
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy.sparse import csr_matrix
|
|
6
|
+
|
|
7
|
+
from ...algorithms.utils import get_top_K_ranks
|
|
8
|
+
from .base import Metric
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MetricTopK(Metric):
|
|
15
|
+
"""Base class for all metrics computed based on the Top-K recommendations for every user.
|
|
16
|
+
|
|
17
|
+
A MetricTopK object is stateful, i.e. after `calculate`
|
|
18
|
+
the results can be retrieved in one of two ways:
|
|
19
|
+
- Detailed results are stored in :attr:`results`,
|
|
20
|
+
- Aggregated result value can be retrieved using :attr:`value`
|
|
21
|
+
|
|
22
|
+
:param K: Size of the recommendation list consisting of the Top-K item predictions.
|
|
23
|
+
:type K: int
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
user_id_sequence_array: np.ndarray,
|
|
29
|
+
user_item_shape: tuple[int, int],
|
|
30
|
+
timestamp_limit: None | int = None,
|
|
31
|
+
K: int = 10,
|
|
32
|
+
) -> None:
|
|
33
|
+
super().__init__(
|
|
34
|
+
user_id_sequence_array=user_id_sequence_array,
|
|
35
|
+
user_item_shape=user_item_shape,
|
|
36
|
+
timestamp_limit=timestamp_limit,
|
|
37
|
+
)
|
|
38
|
+
if K is None:
|
|
39
|
+
warn(f"K not specified, using default value {K}.")
|
|
40
|
+
self.K = K
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def name(self) -> str:
|
|
44
|
+
"""Name of the metric."""
|
|
45
|
+
return f"{super().name}_{self.K}"
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def params(self) -> dict[str, int | None]:
|
|
49
|
+
"""Parameters of the metric."""
|
|
50
|
+
return super().params | {"K": self.K}
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def col_names(self) -> list[str]:
|
|
54
|
+
"""The names of the columns in the results DataFrame."""
|
|
55
|
+
return ["user_id", "score"]
|
|
56
|
+
|
|
57
|
+
def prepare_matrix(self, y_true: csr_matrix, y_pred: csr_matrix) -> tuple[csr_matrix, csr_matrix]:
|
|
58
|
+
y_true, y_pred = super()._prepare_matrix(y_true, y_pred)
|
|
59
|
+
y_pred = get_top_K_ranks(y_pred, self.K)
|
|
60
|
+
return y_true, y_pred
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from scipy.sparse import csr_matrix
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def sparse_inverse_nonzero(a: csr_matrix) -> csr_matrix:
|
|
5
|
+
"""Invert nonzero elements of a `scipy.sparse.csr_matrix`.
|
|
6
|
+
|
|
7
|
+
:param a: Matrix to invert.
|
|
8
|
+
:type a: csr_matrix
|
|
9
|
+
:return: Matrix with nonzero elements inverted.
|
|
10
|
+
:rtype: csr_matrix
|
|
11
|
+
"""
|
|
12
|
+
inv_a = a.copy()
|
|
13
|
+
inv_a.data = 1 / inv_a.data
|
|
14
|
+
return inv_a
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sparse_divide_nonzero(a: csr_matrix, b: csr_matrix) -> csr_matrix:
|
|
18
|
+
"""Elementwise divide of nonzero elements of a by nonzero elements of b.
|
|
19
|
+
|
|
20
|
+
Elements that were zero in either a or b are zero in the resulting matrix.
|
|
21
|
+
|
|
22
|
+
:param a: Numerator.
|
|
23
|
+
:type a: csr_matrix
|
|
24
|
+
:param b: Denominator.
|
|
25
|
+
:type b: csr_matrix
|
|
26
|
+
:return: Result of the elementwise division of matrix a by matrix b.
|
|
27
|
+
:rtype: csr_matrix
|
|
28
|
+
"""
|
|
29
|
+
return a.multiply(sparse_inverse_nonzero(b))
|