recnexteval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recnexteval/__init__.py +20 -0
- recnexteval/algorithms/__init__.py +99 -0
- recnexteval/algorithms/base.py +377 -0
- recnexteval/algorithms/baseline/__init__.py +10 -0
- recnexteval/algorithms/baseline/decay_popularity.py +110 -0
- recnexteval/algorithms/baseline/most_popular.py +72 -0
- recnexteval/algorithms/baseline/random.py +39 -0
- recnexteval/algorithms/baseline/recent_popularity.py +34 -0
- recnexteval/algorithms/itemknn/__init__.py +14 -0
- recnexteval/algorithms/itemknn/itemknn.py +119 -0
- recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
- recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
- recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
- recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
- recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
- recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
- recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
- recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
- recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
- recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
- recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
- recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
- recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
- recnexteval/algorithms/utils.py +51 -0
- recnexteval/datasets/__init__.py +109 -0
- recnexteval/datasets/base.py +316 -0
- recnexteval/datasets/config/__init__.py +113 -0
- recnexteval/datasets/config/amazon.py +188 -0
- recnexteval/datasets/config/base.py +72 -0
- recnexteval/datasets/config/lastfm.py +105 -0
- recnexteval/datasets/config/movielens.py +169 -0
- recnexteval/datasets/config/yelp.py +25 -0
- recnexteval/datasets/datasets/__init__.py +24 -0
- recnexteval/datasets/datasets/amazon.py +151 -0
- recnexteval/datasets/datasets/base.py +250 -0
- recnexteval/datasets/datasets/lastfm.py +121 -0
- recnexteval/datasets/datasets/movielens.py +93 -0
- recnexteval/datasets/datasets/test.py +46 -0
- recnexteval/datasets/datasets/yelp.py +103 -0
- recnexteval/datasets/metadata/__init__.py +58 -0
- recnexteval/datasets/metadata/amazon.py +68 -0
- recnexteval/datasets/metadata/base.py +38 -0
- recnexteval/datasets/metadata/lastfm.py +110 -0
- recnexteval/datasets/metadata/movielens.py +87 -0
- recnexteval/evaluators/__init__.py +189 -0
- recnexteval/evaluators/accumulator.py +167 -0
- recnexteval/evaluators/base.py +216 -0
- recnexteval/evaluators/builder/__init__.py +125 -0
- recnexteval/evaluators/builder/base.py +166 -0
- recnexteval/evaluators/builder/pipeline.py +111 -0
- recnexteval/evaluators/builder/stream.py +54 -0
- recnexteval/evaluators/evaluator_pipeline.py +287 -0
- recnexteval/evaluators/evaluator_stream.py +374 -0
- recnexteval/evaluators/state_management.py +310 -0
- recnexteval/evaluators/strategy.py +32 -0
- recnexteval/evaluators/util.py +124 -0
- recnexteval/matrix/__init__.py +48 -0
- recnexteval/matrix/exception.py +5 -0
- recnexteval/matrix/interaction_matrix.py +784 -0
- recnexteval/matrix/prediction_matrix.py +153 -0
- recnexteval/matrix/util.py +24 -0
- recnexteval/metrics/__init__.py +57 -0
- recnexteval/metrics/binary/__init__.py +4 -0
- recnexteval/metrics/binary/hit.py +49 -0
- recnexteval/metrics/core/__init__.py +10 -0
- recnexteval/metrics/core/base.py +126 -0
- recnexteval/metrics/core/elementwise_top_k.py +75 -0
- recnexteval/metrics/core/listwise_top_k.py +72 -0
- recnexteval/metrics/core/top_k.py +60 -0
- recnexteval/metrics/core/util.py +29 -0
- recnexteval/metrics/ranking/__init__.py +6 -0
- recnexteval/metrics/ranking/dcg.py +55 -0
- recnexteval/metrics/ranking/ndcg.py +78 -0
- recnexteval/metrics/ranking/precision.py +51 -0
- recnexteval/metrics/ranking/recall.py +42 -0
- recnexteval/models/__init__.py +4 -0
- recnexteval/models/base.py +69 -0
- recnexteval/preprocessing/__init__.py +37 -0
- recnexteval/preprocessing/filter.py +181 -0
- recnexteval/preprocessing/preprocessor.py +137 -0
- recnexteval/registries/__init__.py +67 -0
- recnexteval/registries/algorithm.py +68 -0
- recnexteval/registries/base.py +131 -0
- recnexteval/registries/dataset.py +37 -0
- recnexteval/registries/metric.py +57 -0
- recnexteval/settings/__init__.py +127 -0
- recnexteval/settings/base.py +414 -0
- recnexteval/settings/exception.py +8 -0
- recnexteval/settings/leave_n_out_setting.py +48 -0
- recnexteval/settings/processor.py +115 -0
- recnexteval/settings/schema.py +11 -0
- recnexteval/settings/single_time_point_setting.py +111 -0
- recnexteval/settings/sliding_window_setting.py +153 -0
- recnexteval/settings/splitters/__init__.py +14 -0
- recnexteval/settings/splitters/base.py +57 -0
- recnexteval/settings/splitters/n_last.py +39 -0
- recnexteval/settings/splitters/n_last_timestamp.py +76 -0
- recnexteval/settings/splitters/timestamp.py +82 -0
- recnexteval/settings/util.py +0 -0
- recnexteval/utils/__init__.py +115 -0
- recnexteval/utils/json_to_csv_converter.py +128 -0
- recnexteval/utils/logging_tools.py +159 -0
- recnexteval/utils/path.py +155 -0
- recnexteval/utils/url_certificate_installer.py +54 -0
- recnexteval/utils/util.py +166 -0
- recnexteval/utils/uuid_util.py +7 -0
- recnexteval/utils/yaml_tool.py +65 -0
- recnexteval-0.1.0.dist-info/METADATA +85 -0
- recnexteval-0.1.0.dist-info/RECORD +110 -0
- recnexteval-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import defaultdict
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from recnexteval.metrics import Metric
|
|
8
|
+
from .util import MetricLevelEnum
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MetricAccumulator:
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self.acc: defaultdict[str, dict[str, Metric]] = defaultdict(dict)
|
|
17
|
+
|
|
18
|
+
def __getitem__(self, key) -> dict[str, Metric]:
|
|
19
|
+
return self.acc[key]
|
|
20
|
+
|
|
21
|
+
def add(self, metric: Metric, algorithm_name: str) -> None:
|
|
22
|
+
"""Add a metric to the accumulator
|
|
23
|
+
|
|
24
|
+
Takes a :class:`Metric` object and adds it under the algorithm name. If
|
|
25
|
+
the specified metric already exists for the algorithm, it will be
|
|
26
|
+
overwritten with the new metric.
|
|
27
|
+
|
|
28
|
+
:param metric: Metric to store
|
|
29
|
+
:type metric: Metric
|
|
30
|
+
:param algorithm_name: Name of the algorithm
|
|
31
|
+
:type algorithm_name: str
|
|
32
|
+
"""
|
|
33
|
+
if metric.identifier in self.acc[algorithm_name]:
|
|
34
|
+
logger.warning(
|
|
35
|
+
f"Metric {metric.identifier} already exists for algorithm {algorithm_name}. Overwriting..."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
logger.debug(f"Metric {metric.identifier} created for algorithm {algorithm_name}")
|
|
39
|
+
|
|
40
|
+
self.acc[algorithm_name][metric.identifier] = metric
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def user_level_metrics(self) -> defaultdict:
|
|
44
|
+
results = defaultdict()
|
|
45
|
+
for algo_name in self.acc:
|
|
46
|
+
for metric_identifier in self.acc[algo_name]:
|
|
47
|
+
metric = self.acc[algo_name][metric_identifier]
|
|
48
|
+
results[(algo_name, f"t={metric.timestamp_limit}", metric.name)] = (
|
|
49
|
+
metric.micro_result
|
|
50
|
+
)
|
|
51
|
+
return results
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def window_level_metrics(self) -> defaultdict:
|
|
55
|
+
results = defaultdict(dict)
|
|
56
|
+
for algo_name in self.acc:
|
|
57
|
+
for metric_identifier in self.acc[algo_name]:
|
|
58
|
+
metric = self.acc[algo_name][metric_identifier]
|
|
59
|
+
score = metric.macro_result
|
|
60
|
+
num_user = metric.num_users
|
|
61
|
+
if score == 0 and num_user == 0:
|
|
62
|
+
logger.info(
|
|
63
|
+
f"Metric {metric.name} for algorithm {algo_name} "
|
|
64
|
+
f"at t={metric.timestamp_limit} has 0 score and 0 users. "
|
|
65
|
+
"The ground truth may be empty due to no interactions occurring in that window."
|
|
66
|
+
)
|
|
67
|
+
elif score == 0 and num_user != 0:
|
|
68
|
+
logger.info(
|
|
69
|
+
f"Metric {metric.name} for algorithm {algo_name} "
|
|
70
|
+
f"at t={metric.timestamp_limit} has 0 score but there are interactions. "
|
|
71
|
+
f"{algo_name} did not have any correct predictions."
|
|
72
|
+
)
|
|
73
|
+
results[(algo_name, f"t={metric.timestamp_limit}", metric.name)]["score"] = score
|
|
74
|
+
results[(algo_name, f"t={metric.timestamp_limit}", metric.name)]["num_user"] = (
|
|
75
|
+
num_user
|
|
76
|
+
)
|
|
77
|
+
return results
|
|
78
|
+
|
|
79
|
+
def df_user_level_metric(self) -> pd.DataFrame:
|
|
80
|
+
"""User metric across all timestamps
|
|
81
|
+
|
|
82
|
+
Computation of metrics evaluated on the user level
|
|
83
|
+
|
|
84
|
+
:return: _description_
|
|
85
|
+
:rtype: pd.DataFrame
|
|
86
|
+
"""
|
|
87
|
+
df = pd.DataFrame.from_dict(self.user_level_metrics, orient="index").explode(
|
|
88
|
+
["user_id", "score"]
|
|
89
|
+
)
|
|
90
|
+
df = df.rename_axis(["algorithm", "timestamp", "metric"])
|
|
91
|
+
df.rename(columns={"score": "user_score"}, inplace=True)
|
|
92
|
+
return df
|
|
93
|
+
|
|
94
|
+
def df_window_level_metric(self) -> pd.DataFrame:
|
|
95
|
+
df = pd.DataFrame.from_dict(self.window_level_metrics, orient="index").explode(
|
|
96
|
+
["score", "num_user"]
|
|
97
|
+
)
|
|
98
|
+
df = df.rename_axis(["algorithm", "timestamp", "metric"])
|
|
99
|
+
df.rename(columns={"score": "window_score"}, inplace=True)
|
|
100
|
+
return df
|
|
101
|
+
|
|
102
|
+
def df_macro_level_metric(self) -> pd.DataFrame:
|
|
103
|
+
"""Macro metric across all timestamps
|
|
104
|
+
|
|
105
|
+
:return: _description_
|
|
106
|
+
:rtype: pd.DataFrame
|
|
107
|
+
"""
|
|
108
|
+
df = pd.DataFrame.from_dict(self.window_level_metrics, orient="index").explode(
|
|
109
|
+
["score", "num_user"]
|
|
110
|
+
)
|
|
111
|
+
df = df.rename_axis(["algorithm", "timestamp", "metric"])
|
|
112
|
+
result = df.groupby(["algorithm", "metric"]).mean()["score"].to_frame()
|
|
113
|
+
result["num_window"] = df.groupby(["algorithm", "metric"]).count()["score"]
|
|
114
|
+
result = result.rename(columns={"score": "macro_score"})
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
def df_micro_level_metric(self) -> pd.DataFrame:
|
|
118
|
+
"""Micro metric across all timestamps
|
|
119
|
+
|
|
120
|
+
:return: _description_
|
|
121
|
+
:rtype: pd.DataFrame
|
|
122
|
+
"""
|
|
123
|
+
df = pd.DataFrame.from_dict(self.user_level_metrics, orient="index").explode(
|
|
124
|
+
["user_id", "score"]
|
|
125
|
+
)
|
|
126
|
+
df = df.rename_axis(["algorithm", "timestamp", "metric"])
|
|
127
|
+
result = df.groupby(["algorithm", "metric"])["score"].mean().to_frame()
|
|
128
|
+
result["num_user"] = df.groupby(["algorithm", "metric"])["score"].count()
|
|
129
|
+
result = result.rename(columns={"score": "micro_score"})
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
def df_metric(
|
|
133
|
+
self,
|
|
134
|
+
filter_timestamp: Optional[int] = None,
|
|
135
|
+
filter_algo: Optional[str] = None,
|
|
136
|
+
level: MetricLevelEnum = MetricLevelEnum.MACRO,
|
|
137
|
+
) -> pd.DataFrame:
|
|
138
|
+
"""Dataframe representation of the metric
|
|
139
|
+
|
|
140
|
+
Returns a dataframe representation of the metric. The dataframe can be
|
|
141
|
+
filtered based on the algorithm name and the timestamp.
|
|
142
|
+
|
|
143
|
+
:param filter_timestamp: Timestamp value to filter on, defaults to None
|
|
144
|
+
:type filter_timestamp: Optional[int], optional
|
|
145
|
+
:param filter_algo: Algorithm name to filter on, defaults to None
|
|
146
|
+
:type filter_algo: Optional[str], optional
|
|
147
|
+
:param level: Level of the metric to compute, defaults to MetricLevelEnum.MACRO
|
|
148
|
+
:type level: MetricLevelEnum, optional
|
|
149
|
+
:return: Dataframe representation of the metric
|
|
150
|
+
:rtype: pd.DataFrame
|
|
151
|
+
"""
|
|
152
|
+
if level == MetricLevelEnum.MACRO:
|
|
153
|
+
df = self.df_macro_level_metric()
|
|
154
|
+
elif level == MetricLevelEnum.MICRO:
|
|
155
|
+
df = self.df_micro_level_metric()
|
|
156
|
+
elif level == MetricLevelEnum.WINDOW:
|
|
157
|
+
df = self.df_window_level_metric()
|
|
158
|
+
elif level == MetricLevelEnum.USER:
|
|
159
|
+
df = self.df_user_level_metric()
|
|
160
|
+
else:
|
|
161
|
+
raise ValueError("Invalid level specified")
|
|
162
|
+
|
|
163
|
+
if filter_algo:
|
|
164
|
+
df = df.filter(like=filter_algo, axis=0)
|
|
165
|
+
if filter_timestamp:
|
|
166
|
+
df = df.filter(like=f"t={filter_timestamp}", axis=0)
|
|
167
|
+
return df
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from scipy.sparse import csr_matrix
|
|
6
|
+
|
|
7
|
+
from ..matrix import PredictionMatrix
|
|
8
|
+
from ..registries import MetricEntry
|
|
9
|
+
from ..settings import EOWSettingError, Setting
|
|
10
|
+
from .accumulator import MetricAccumulator
|
|
11
|
+
from .util import MetricLevelEnum, UserItemBaseStatus
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EvaluatorBase(object):
|
|
18
|
+
"""Base class for evaluator.
|
|
19
|
+
|
|
20
|
+
Provides the common methods and attributes for the evaluator classes. Should
|
|
21
|
+
there be a need to create a new evaluator, it should inherit from this class.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
metric_entries: List of metric entries to compute.
|
|
25
|
+
setting: Setting object.
|
|
26
|
+
ignore_unknown_user: Ignore unknown users, defaults to False.
|
|
27
|
+
ignore_unknown_item: Ignore unknown items, defaults to False.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
metric_entries: list[MetricEntry],
|
|
33
|
+
setting: Setting,
|
|
34
|
+
metric_k: int,
|
|
35
|
+
ignore_unknown_user: bool = False,
|
|
36
|
+
ignore_unknown_item: bool = False,
|
|
37
|
+
seed: int = 42,
|
|
38
|
+
) -> None:
|
|
39
|
+
self.metric_entries = metric_entries
|
|
40
|
+
self.setting = setting
|
|
41
|
+
"""Setting to evaluate the algorithms on."""
|
|
42
|
+
self.metric_k = metric_k
|
|
43
|
+
"""Value of K for the metrics."""
|
|
44
|
+
self.ignore_unknown_user = ignore_unknown_user
|
|
45
|
+
"""To ignore unknown users during evaluation."""
|
|
46
|
+
self.ignore_unknown_item = ignore_unknown_item
|
|
47
|
+
"""To ignore unknown items during evaluation."""
|
|
48
|
+
|
|
49
|
+
self.user_item_base = UserItemBaseStatus()
|
|
50
|
+
self.seed = seed
|
|
51
|
+
self._run_step = 0
|
|
52
|
+
self._acc: MetricAccumulator
|
|
53
|
+
self._current_timestamp: int
|
|
54
|
+
|
|
55
|
+
def _get_evaluation_data(self) -> tuple[PredictionMatrix, PredictionMatrix, int]:
|
|
56
|
+
"""Get the evaluation data for the current step.
|
|
57
|
+
|
|
58
|
+
Internal method to get the evaluation data for the current step. The
|
|
59
|
+
evaluation data consists of the unlabeled data, ground truth data, and
|
|
60
|
+
the current timestamp which will be returned as a tuple. The shapes
|
|
61
|
+
are masked based through `user_item_base`. The unknown users in
|
|
62
|
+
the ground truth data are also updated in `user_item_base`.
|
|
63
|
+
|
|
64
|
+
Note:
|
|
65
|
+
`_current_timestamp` is updated with the current timestamp.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Tuple of unlabeled data, ground truth data, and current timestamp.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
EOWSettingError: If there is no more data to be processed.
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
split = self.setting.get_split_at(self._run_step)
|
|
75
|
+
unlabeled_data = split.unlabeled
|
|
76
|
+
ground_truth_data = split.ground_truth
|
|
77
|
+
if split.t_window is None:
|
|
78
|
+
raise ValueError("Timestamp of the current split cannot be None")
|
|
79
|
+
self._current_timestamp = split.t_window
|
|
80
|
+
|
|
81
|
+
unlabeled_data = PredictionMatrix.from_interaction_matrix(unlabeled_data)
|
|
82
|
+
ground_truth_data = PredictionMatrix.from_interaction_matrix(ground_truth_data)
|
|
83
|
+
self._run_step += 1
|
|
84
|
+
except EOWSettingError:
|
|
85
|
+
raise EOWSettingError("There is no more data to be processed, EOW reached")
|
|
86
|
+
|
|
87
|
+
self.user_item_base.update_unknown_user_item_base(ground_truth_data)
|
|
88
|
+
|
|
89
|
+
mask_shape = (self.user_item_base.known_shape[0], self.user_item_base.known_shape[1])
|
|
90
|
+
if not self.ignore_unknown_user:
|
|
91
|
+
mask_shape = (self.user_item_base.global_shape[0], mask_shape[1])
|
|
92
|
+
|
|
93
|
+
unlabeled_data.mask_user_item_shape(
|
|
94
|
+
shape=mask_shape
|
|
95
|
+
)
|
|
96
|
+
ground_truth_data.mask_user_item_shape(
|
|
97
|
+
shape=mask_shape,
|
|
98
|
+
drop_unknown_item=self.ignore_unknown_item,
|
|
99
|
+
inherit_max_id=True, # Ensures that shape of ground truth contains all user id that appears globally
|
|
100
|
+
)
|
|
101
|
+
# get the index of ground_truth_data._df
|
|
102
|
+
if self.ignore_unknown_item:
|
|
103
|
+
unlabeled_data._df = unlabeled_data._df.loc[ground_truth_data._df.index]
|
|
104
|
+
return unlabeled_data, ground_truth_data, self._current_timestamp
|
|
105
|
+
|
|
106
|
+
def _prediction_shape_handler(
|
|
107
|
+
self, y_true: csr_matrix, y_pred: csr_matrix
|
|
108
|
+
) -> csr_matrix:
|
|
109
|
+
"""Handle shape difference of the prediction matrix.
|
|
110
|
+
|
|
111
|
+
If there is a difference in the shape of the prediction matrix and the
|
|
112
|
+
ground truth matrix, this function will handle the difference based on
|
|
113
|
+
`ignore_unknown_user` and `ignore_unknown_item`.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
X_true: Ground truth matrix.
|
|
117
|
+
X_pred: Prediction matrix.
|
|
118
|
+
"""
|
|
119
|
+
X_true_shape = y_true.shape
|
|
120
|
+
if y_pred.shape != X_true_shape:
|
|
121
|
+
logger.warning("Prediction matrix shape %s is different from ground truth matrix shape %s.", y_pred.shape, X_true_shape)
|
|
122
|
+
# We cannot expect the algorithm to predict an unknown item, so we
|
|
123
|
+
# only check user dimension
|
|
124
|
+
if y_pred.shape[0] < X_true_shape[0] and not self.ignore_unknown_user: # type: ignore
|
|
125
|
+
raise ValueError(
|
|
126
|
+
"Prediction matrix shape, user dimension, is less than the ground truth matrix shape."
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if not self.ignore_unknown_item:
|
|
130
|
+
# prediction matrix would not contain unknown item ID
|
|
131
|
+
# update the shape of the prediction matrix to include the ID
|
|
132
|
+
y_pred = csr_matrix(
|
|
133
|
+
(y_pred.data, y_pred.indices, y_pred.indptr),
|
|
134
|
+
shape=(y_pred.shape[0], X_true_shape[1]), # type: ignore
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# shapes might not be the same in the case of dropping unknowns
|
|
138
|
+
# from the ground truth data. We ensure that the same unknowns
|
|
139
|
+
# are dropped from the predictions
|
|
140
|
+
if self.ignore_unknown_user:
|
|
141
|
+
y_pred = y_pred[: X_true_shape[0], :] # type: ignore
|
|
142
|
+
if self.ignore_unknown_item:
|
|
143
|
+
y_pred = y_pred[:, : X_true_shape[1]] # type: ignore
|
|
144
|
+
|
|
145
|
+
return y_pred
|
|
146
|
+
|
|
147
|
+
def metric_results(
|
|
148
|
+
self,
|
|
149
|
+
level: MetricLevelEnum | Literal["macro", "micro", "window", "user"] = MetricLevelEnum.MACRO,
|
|
150
|
+
only_current_timestamp: None | bool = False,
|
|
151
|
+
filter_timestamp: None | int = None,
|
|
152
|
+
filter_algo: None | str = None,
|
|
153
|
+
) -> pd.DataFrame:
|
|
154
|
+
"""Results of the metrics computed.
|
|
155
|
+
|
|
156
|
+
Computes the metrics of all algorithms based on the level specified and
|
|
157
|
+
return the results in a pandas DataFrame. The results can be filtered
|
|
158
|
+
based on the algorithm name and the current timestamp.
|
|
159
|
+
|
|
160
|
+
Specifics
|
|
161
|
+
---------
|
|
162
|
+
- User level: User level metrics computed across all timestamps.
|
|
163
|
+
- Window level: Window level metrics computed across all timestamps. This can
|
|
164
|
+
be viewed as a macro level metric in the context of a single window, where
|
|
165
|
+
the scores of each user is averaged within the window.
|
|
166
|
+
- Macro level: Macro level metrics computed for entire timeline. This
|
|
167
|
+
score is computed by averaging the scores of all windows, treating each
|
|
168
|
+
window equally.
|
|
169
|
+
- Micro level: Micro level metrics computed for entire timeline. This
|
|
170
|
+
score is computed by averaging the scores of all users, treating each
|
|
171
|
+
user and the timestamp the user is in as unique contribution to the
|
|
172
|
+
overall score.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
level: Level of the metric to compute, defaults to "macro".
|
|
176
|
+
only_current_timestamp: Filter only the current timestamp, defaults to False.
|
|
177
|
+
filter_timestamp: Timestamp value to filter on, defaults to None.
|
|
178
|
+
If both `only_current_timestamp` and `filter_timestamp` are provided,
|
|
179
|
+
`filter_timestamp` will be used.
|
|
180
|
+
filter_algo: Algorithm name to filter on, defaults to None.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Dataframe representation of the metric.
|
|
184
|
+
"""
|
|
185
|
+
if isinstance(level, str) and not MetricLevelEnum.has_value(level):
|
|
186
|
+
raise ValueError("Invalid level specified")
|
|
187
|
+
level = MetricLevelEnum(level)
|
|
188
|
+
|
|
189
|
+
if only_current_timestamp and filter_timestamp:
|
|
190
|
+
raise ValueError("Cannot specify both only_current_timestamp and filter_timestamp.")
|
|
191
|
+
|
|
192
|
+
timestamp = None
|
|
193
|
+
if only_current_timestamp:
|
|
194
|
+
timestamp = self._current_timestamp
|
|
195
|
+
|
|
196
|
+
if filter_timestamp:
|
|
197
|
+
timestamp = filter_timestamp
|
|
198
|
+
|
|
199
|
+
return self._acc.df_metric(filter_algo=filter_algo, filter_timestamp=timestamp, level=level)
|
|
200
|
+
|
|
201
|
+
def restore(self) -> None:
|
|
202
|
+
"""Restore the generators before pickling.
|
|
203
|
+
|
|
204
|
+
This method is used to restore the generators after loading the object
|
|
205
|
+
from a pickle file.
|
|
206
|
+
"""
|
|
207
|
+
self.setting.restore(self._run_step)
|
|
208
|
+
logger.debug("Generators restored")
|
|
209
|
+
|
|
210
|
+
def current_step(self) -> int:
|
|
211
|
+
"""Return the current step of the evaluator.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Current step of the evaluator.
|
|
215
|
+
"""
|
|
216
|
+
return self._run_step
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Builder module for constructing evaluator objects.
|
|
2
|
+
|
|
3
|
+
This module provides builder classes for constructing evaluator objects in the
|
|
4
|
+
RecNextEval library. Builders follow the builder pattern to facilitate the
|
|
5
|
+
construction of evaluators with proper validation and error checking.
|
|
6
|
+
|
|
7
|
+
## Builder Overview
|
|
8
|
+
|
|
9
|
+
The builder pattern is used to construct complex evaluator objects step by step.
|
|
10
|
+
Builders ensure that all necessary components (settings, metrics, algorithms)
|
|
11
|
+
are properly configured before building the evaluator, preventing runtime errors.
|
|
12
|
+
|
|
13
|
+
## Available Builders
|
|
14
|
+
|
|
15
|
+
- `Builder`: Abstract base class for all builder implementations
|
|
16
|
+
- `EvaluatorPipelineBuilder`: Builder for pipeline evaluators that evaluate
|
|
17
|
+
multiple algorithms on static data
|
|
18
|
+
- `EvaluatorStreamerBuilder`: Builder for streaming evaluators that evaluate
|
|
19
|
+
algorithms on streaming data
|
|
20
|
+
|
|
21
|
+
## Using Builders
|
|
22
|
+
|
|
23
|
+
### Basic Pipeline Evaluation
|
|
24
|
+
|
|
25
|
+
To evaluate multiple algorithms on a static dataset using a pipeline evaluator:
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from recnexteval.evaluators.builder import EvaluatorPipelineBuilder
|
|
29
|
+
from recnexteval.settings import Setting
|
|
30
|
+
from recnexteval.datasets import AmazonMusicDataset
|
|
31
|
+
|
|
32
|
+
# Load dataset
|
|
33
|
+
dataset = AmazonMusicDataset()
|
|
34
|
+
data = dataset.load()
|
|
35
|
+
|
|
36
|
+
# Create setting
|
|
37
|
+
setting = Setting(data=data, top_K=10)
|
|
38
|
+
setting.split()
|
|
39
|
+
|
|
40
|
+
# Build evaluator
|
|
41
|
+
builder = EvaluatorPipelineBuilder(seed=42)
|
|
42
|
+
builder.add_setting(setting)
|
|
43
|
+
builder.set_metric_K(10)
|
|
44
|
+
builder.add_metric("PrecisionK")
|
|
45
|
+
builder.add_metric("RecallK")
|
|
46
|
+
builder.add_algorithm("MostPopular")
|
|
47
|
+
builder.add_algorithm("RecentPop", params={"K": 10})
|
|
48
|
+
|
|
49
|
+
evaluator = builder.build()
|
|
50
|
+
results = evaluator.evaluate()
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Streaming Evaluation
|
|
54
|
+
|
|
55
|
+
To evaluate algorithms on streaming data:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from recnexteval.evaluators.builder import EvaluatorStreamerBuilder
|
|
59
|
+
from recnexteval.settings import StreamingSetting
|
|
60
|
+
from recnexteval.datasets import AmazonMusicDataset
|
|
61
|
+
|
|
62
|
+
# Load dataset
|
|
63
|
+
dataset = AmazonMusicDataset()
|
|
64
|
+
data = dataset.load()
|
|
65
|
+
|
|
66
|
+
# Create streaming setting
|
|
67
|
+
setting = StreamingSetting(data=data, top_K=10, window_size=1000)
|
|
68
|
+
setting.split()
|
|
69
|
+
|
|
70
|
+
# Build streaming evaluator
|
|
71
|
+
builder = EvaluatorStreamerBuilder(seed=42)
|
|
72
|
+
builder.add_setting(setting)
|
|
73
|
+
builder.set_metric_K(10)
|
|
74
|
+
builder.add_metric("HitK")
|
|
75
|
+
builder.add_metric("NDCGK")
|
|
76
|
+
|
|
77
|
+
evaluator = builder.build()
|
|
78
|
+
# The evaluator can now process streaming data
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Advanced Configuration
|
|
82
|
+
|
|
83
|
+
Builders support advanced configuration options:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from recnexteval.evaluators.builder import EvaluatorPipelineBuilder
|
|
87
|
+
|
|
88
|
+
builder = EvaluatorPipelineBuilder(
|
|
89
|
+
ignore_unknown_user=False, # Don't ignore unknown users
|
|
90
|
+
ignore_unknown_item=True, # Ignore unknown items
|
|
91
|
+
seed=123
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
builder.add_setting(setting)
|
|
95
|
+
builder.set_metric_K(20)
|
|
96
|
+
|
|
97
|
+
# Add multiple metrics
|
|
98
|
+
metrics = ["PrecisionK", "RecallK", "DCGK", "NDCGK", "HitK"]
|
|
99
|
+
for metric in metrics:
|
|
100
|
+
builder.add_metric(metric)
|
|
101
|
+
|
|
102
|
+
# Add algorithms with custom parameters
|
|
103
|
+
builder.add_algorithm("ItemKNN", params={"K": 50, "similarity": "cosine"})
|
|
104
|
+
builder.add_algorithm("DecayPop", params={"decay_factor": 0.9})
|
|
105
|
+
|
|
106
|
+
evaluator = builder.build()
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Extending the Framework
|
|
110
|
+
|
|
111
|
+
To create custom builders, inherit from the `Builder` base class and implement
|
|
112
|
+
the `build()` method. Ensure to call `super().__init__()` and implement proper
|
|
113
|
+
validation in `_check_ready()`.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
from .base import Builder
|
|
117
|
+
from .pipeline import EvaluatorPipelineBuilder
|
|
118
|
+
from .stream import EvaluatorStreamerBuilder
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
__all__ = [
|
|
122
|
+
"Builder",
|
|
123
|
+
"EvaluatorPipelineBuilder",
|
|
124
|
+
"EvaluatorStreamerBuilder",
|
|
125
|
+
]
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from warnings import warn
|
|
4
|
+
|
|
5
|
+
from recnexteval.registries import (
|
|
6
|
+
METRIC_REGISTRY,
|
|
7
|
+
MetricEntry,
|
|
8
|
+
)
|
|
9
|
+
from recnexteval.settings import Setting
|
|
10
|
+
from recnexteval.utils import arg_to_str
|
|
11
|
+
from ..base import EvaluatorBase
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Builder(ABC):
|
|
18
|
+
"""Base class for Builder objects.
|
|
19
|
+
|
|
20
|
+
Provides methods to set specific values for the builder and enforce checks
|
|
21
|
+
such that the builder can be constructed correctly and to avoid possible
|
|
22
|
+
errors when the builder is executed.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
ignore_unknown_user: bool = True,
|
|
28
|
+
ignore_unknown_item: bool = True,
|
|
29
|
+
seed: int = 42,
|
|
30
|
+
) -> None:
|
|
31
|
+
"""Initialize the Builder.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
ignore_unknown_user: Ignore unknown user in the evaluation.
|
|
35
|
+
ignore_unknown_item: Ignore unknown item in the evaluation.
|
|
36
|
+
seed: Random seed for reproducibility.
|
|
37
|
+
"""
|
|
38
|
+
self.metric_entries: dict[str, MetricEntry] = dict()
|
|
39
|
+
"""dict of metrics to evaluate algorithm on.
|
|
40
|
+
Using dict instead of list for fast lookup"""
|
|
41
|
+
self.setting: Setting
|
|
42
|
+
"""Setting to evaluate the algorithms on"""
|
|
43
|
+
self.ignore_unknown_user = ignore_unknown_user
|
|
44
|
+
"""Ignore unknown user in the evaluation"""
|
|
45
|
+
self.ignore_unknown_item = ignore_unknown_item
|
|
46
|
+
"""Ignore unknown item in the evaluation"""
|
|
47
|
+
self.metric_k: int
|
|
48
|
+
self.seed: int = seed
|
|
49
|
+
|
|
50
|
+
def _check_setting_exist(self) -> bool:
|
|
51
|
+
"""Check if setting is already set.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
True if setting is set, False otherwise.
|
|
55
|
+
"""
|
|
56
|
+
return not (not hasattr(self, "setting") or self.setting is None)
|
|
57
|
+
|
|
58
|
+
def set_metric_K(self, K: int) -> None:
|
|
59
|
+
"""Set K value for all metrics.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
K: K value to set for all metrics.
|
|
63
|
+
"""
|
|
64
|
+
self.metric_k = K
|
|
65
|
+
|
|
66
|
+
def add_metric(self, metric: str | type) -> None:
|
|
67
|
+
"""Add metric to evaluate algorithm on.
|
|
68
|
+
|
|
69
|
+
Metric will be added to the metric_entries dict where it will later be
|
|
70
|
+
converted to a list when the evaluator is constructed.
|
|
71
|
+
|
|
72
|
+
Note:
|
|
73
|
+
If K is not yet specified, the setting's top_K value will be used. This
|
|
74
|
+
requires the setting to be set before adding the metric.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
metric: Metric to evaluate algorithm on.
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ValueError: If metric is not found in METRIC_REGISTRY.
|
|
81
|
+
RuntimeError: If setting is not set.
|
|
82
|
+
"""
|
|
83
|
+
if not self._check_setting_exist():
|
|
84
|
+
raise RuntimeError(
|
|
85
|
+
"Setting has not been set. To ensure conformity, of the addition of"
|
|
86
|
+
" other components please set the setting first. Call add_setting() method."
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
metric = arg_to_str(metric)
|
|
90
|
+
|
|
91
|
+
if metric not in METRIC_REGISTRY:
|
|
92
|
+
raise ValueError(f"Metric {metric} could not be resolved.")
|
|
93
|
+
|
|
94
|
+
if not hasattr(self, "metric_k"):
|
|
95
|
+
self.metric_k = self.setting.top_K
|
|
96
|
+
warn(
|
|
97
|
+
"K value not yet specified before setting metric, using setting's top_K value."
|
|
98
|
+
" We recommend specifying K value for metric. If you want to change the K value,"
|
|
99
|
+
" you can clear all metric entry and set the K value before adding metrics."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
metric_name = f"{metric}_{self.metric_k}"
|
|
103
|
+
if metric_name in self.metric_entries:
|
|
104
|
+
logger.warning(f"Metric {metric_name} already exists. Skipping adding metric.")
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
self.metric_entries[metric_name] = MetricEntry(metric, self.metric_k)
|
|
108
|
+
|
|
109
|
+
def add_setting(self, setting: Setting) -> None:
|
|
110
|
+
"""Add setting to the evaluator builder.
|
|
111
|
+
|
|
112
|
+
Note:
|
|
113
|
+
The setting should be set before adding metrics or algorithms
|
|
114
|
+
to the evaluator.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
setting: Setting to evaluate the algorithms on.
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: If setting is not of instance Setting.
|
|
121
|
+
"""
|
|
122
|
+
if not isinstance(setting, Setting):
|
|
123
|
+
raise ValueError(f"setting should be of type Setting, got {type(setting)}")
|
|
124
|
+
if hasattr(self, "setting") and self.setting is not None:
|
|
125
|
+
warn("Setting is already set. Continuing will overwrite the setting.")
|
|
126
|
+
|
|
127
|
+
self.setting = setting
|
|
128
|
+
|
|
129
|
+
def clear_metrics(self) -> None:
|
|
130
|
+
"""Clear all metrics from the builder."""
|
|
131
|
+
self.metric_entries.clear()
|
|
132
|
+
|
|
133
|
+
def _check_ready(self) -> None:
|
|
134
|
+
"""Check if the builder is ready to construct Evaluator.
|
|
135
|
+
|
|
136
|
+
Raises:
|
|
137
|
+
RuntimeError: If there are invalid configurations.
|
|
138
|
+
"""
|
|
139
|
+
if not hasattr(self, "metric_k"):
|
|
140
|
+
self.metric_k = self.setting.top_K
|
|
141
|
+
warn(
|
|
142
|
+
"K value not yet specified before setting metric, using setting's top_K value."
|
|
143
|
+
" We recommend specifying K value for metric. If you want to change the K value,"
|
|
144
|
+
" you can clear all metric entry and set the K value before adding metrics."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if len(self.metric_entries) == 0:
|
|
148
|
+
raise RuntimeError("No metrics specified, can't construct Evaluator")
|
|
149
|
+
|
|
150
|
+
# Check for settings #
|
|
151
|
+
if self.setting is None:
|
|
152
|
+
raise RuntimeError("No settings specified, can't construct Evaluator")
|
|
153
|
+
if not self.setting.is_ready:
|
|
154
|
+
raise RuntimeError(
|
|
155
|
+
"Setting is not ready, can't construct Evaluator. "
|
|
156
|
+
"Call split() on the setting first."
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
@abstractmethod
|
|
160
|
+
def build(self) -> EvaluatorBase:
|
|
161
|
+
"""Build object.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
NotImplementedError: If the method is not implemented.
|
|
165
|
+
"""
|
|
166
|
+
raise NotImplementedError
|