recnexteval 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. recnexteval/__init__.py +20 -0
  2. recnexteval/algorithms/__init__.py +99 -0
  3. recnexteval/algorithms/base.py +377 -0
  4. recnexteval/algorithms/baseline/__init__.py +10 -0
  5. recnexteval/algorithms/baseline/decay_popularity.py +110 -0
  6. recnexteval/algorithms/baseline/most_popular.py +72 -0
  7. recnexteval/algorithms/baseline/random.py +39 -0
  8. recnexteval/algorithms/baseline/recent_popularity.py +34 -0
  9. recnexteval/algorithms/itemknn/__init__.py +14 -0
  10. recnexteval/algorithms/itemknn/itemknn.py +119 -0
  11. recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
  12. recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
  13. recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
  14. recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
  15. recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
  16. recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
  17. recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
  18. recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
  19. recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
  20. recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
  21. recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
  22. recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
  23. recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
  24. recnexteval/algorithms/utils.py +51 -0
  25. recnexteval/datasets/__init__.py +109 -0
  26. recnexteval/datasets/base.py +316 -0
  27. recnexteval/datasets/config/__init__.py +113 -0
  28. recnexteval/datasets/config/amazon.py +188 -0
  29. recnexteval/datasets/config/base.py +72 -0
  30. recnexteval/datasets/config/lastfm.py +105 -0
  31. recnexteval/datasets/config/movielens.py +169 -0
  32. recnexteval/datasets/config/yelp.py +25 -0
  33. recnexteval/datasets/datasets/__init__.py +24 -0
  34. recnexteval/datasets/datasets/amazon.py +151 -0
  35. recnexteval/datasets/datasets/base.py +250 -0
  36. recnexteval/datasets/datasets/lastfm.py +121 -0
  37. recnexteval/datasets/datasets/movielens.py +93 -0
  38. recnexteval/datasets/datasets/test.py +46 -0
  39. recnexteval/datasets/datasets/yelp.py +103 -0
  40. recnexteval/datasets/metadata/__init__.py +58 -0
  41. recnexteval/datasets/metadata/amazon.py +68 -0
  42. recnexteval/datasets/metadata/base.py +38 -0
  43. recnexteval/datasets/metadata/lastfm.py +110 -0
  44. recnexteval/datasets/metadata/movielens.py +87 -0
  45. recnexteval/evaluators/__init__.py +189 -0
  46. recnexteval/evaluators/accumulator.py +167 -0
  47. recnexteval/evaluators/base.py +216 -0
  48. recnexteval/evaluators/builder/__init__.py +125 -0
  49. recnexteval/evaluators/builder/base.py +166 -0
  50. recnexteval/evaluators/builder/pipeline.py +111 -0
  51. recnexteval/evaluators/builder/stream.py +54 -0
  52. recnexteval/evaluators/evaluator_pipeline.py +287 -0
  53. recnexteval/evaluators/evaluator_stream.py +374 -0
  54. recnexteval/evaluators/state_management.py +310 -0
  55. recnexteval/evaluators/strategy.py +32 -0
  56. recnexteval/evaluators/util.py +124 -0
  57. recnexteval/matrix/__init__.py +48 -0
  58. recnexteval/matrix/exception.py +5 -0
  59. recnexteval/matrix/interaction_matrix.py +784 -0
  60. recnexteval/matrix/prediction_matrix.py +153 -0
  61. recnexteval/matrix/util.py +24 -0
  62. recnexteval/metrics/__init__.py +57 -0
  63. recnexteval/metrics/binary/__init__.py +4 -0
  64. recnexteval/metrics/binary/hit.py +49 -0
  65. recnexteval/metrics/core/__init__.py +10 -0
  66. recnexteval/metrics/core/base.py +126 -0
  67. recnexteval/metrics/core/elementwise_top_k.py +75 -0
  68. recnexteval/metrics/core/listwise_top_k.py +72 -0
  69. recnexteval/metrics/core/top_k.py +60 -0
  70. recnexteval/metrics/core/util.py +29 -0
  71. recnexteval/metrics/ranking/__init__.py +6 -0
  72. recnexteval/metrics/ranking/dcg.py +55 -0
  73. recnexteval/metrics/ranking/ndcg.py +78 -0
  74. recnexteval/metrics/ranking/precision.py +51 -0
  75. recnexteval/metrics/ranking/recall.py +42 -0
  76. recnexteval/models/__init__.py +4 -0
  77. recnexteval/models/base.py +69 -0
  78. recnexteval/preprocessing/__init__.py +37 -0
  79. recnexteval/preprocessing/filter.py +181 -0
  80. recnexteval/preprocessing/preprocessor.py +137 -0
  81. recnexteval/registries/__init__.py +67 -0
  82. recnexteval/registries/algorithm.py +68 -0
  83. recnexteval/registries/base.py +131 -0
  84. recnexteval/registries/dataset.py +37 -0
  85. recnexteval/registries/metric.py +57 -0
  86. recnexteval/settings/__init__.py +127 -0
  87. recnexteval/settings/base.py +414 -0
  88. recnexteval/settings/exception.py +8 -0
  89. recnexteval/settings/leave_n_out_setting.py +48 -0
  90. recnexteval/settings/processor.py +115 -0
  91. recnexteval/settings/schema.py +11 -0
  92. recnexteval/settings/single_time_point_setting.py +111 -0
  93. recnexteval/settings/sliding_window_setting.py +153 -0
  94. recnexteval/settings/splitters/__init__.py +14 -0
  95. recnexteval/settings/splitters/base.py +57 -0
  96. recnexteval/settings/splitters/n_last.py +39 -0
  97. recnexteval/settings/splitters/n_last_timestamp.py +76 -0
  98. recnexteval/settings/splitters/timestamp.py +82 -0
  99. recnexteval/settings/util.py +0 -0
  100. recnexteval/utils/__init__.py +115 -0
  101. recnexteval/utils/json_to_csv_converter.py +128 -0
  102. recnexteval/utils/logging_tools.py +159 -0
  103. recnexteval/utils/path.py +155 -0
  104. recnexteval/utils/url_certificate_installer.py +54 -0
  105. recnexteval/utils/util.py +166 -0
  106. recnexteval/utils/uuid_util.py +7 -0
  107. recnexteval/utils/yaml_tool.py +65 -0
  108. recnexteval-0.1.0.dist-info/METADATA +85 -0
  109. recnexteval-0.1.0.dist-info/RECORD +110 -0
  110. recnexteval-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,167 @@
1
+ import logging
2
+ from collections import defaultdict
3
+ from typing import Optional
4
+
5
+ import pandas as pd
6
+
7
+ from recnexteval.metrics import Metric
8
+ from .util import MetricLevelEnum
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class MetricAccumulator:
15
+ def __init__(self) -> None:
16
+ self.acc: defaultdict[str, dict[str, Metric]] = defaultdict(dict)
17
+
18
+ def __getitem__(self, key) -> dict[str, Metric]:
19
+ return self.acc[key]
20
+
21
+ def add(self, metric: Metric, algorithm_name: str) -> None:
22
+ """Add a metric to the accumulator
23
+
24
+ Takes a :class:`Metric` object and adds it under the algorithm name. If
25
+ the specified metric already exists for the algorithm, it will be
26
+ overwritten with the new metric.
27
+
28
+ :param metric: Metric to store
29
+ :type metric: Metric
30
+ :param algorithm_name: Name of the algorithm
31
+ :type algorithm_name: str
32
+ """
33
+ if metric.identifier in self.acc[algorithm_name]:
34
+ logger.warning(
35
+ f"Metric {metric.identifier} already exists for algorithm {algorithm_name}. Overwriting..."
36
+ )
37
+
38
+ logger.debug(f"Metric {metric.identifier} created for algorithm {algorithm_name}")
39
+
40
+ self.acc[algorithm_name][metric.identifier] = metric
41
+
42
+ @property
43
+ def user_level_metrics(self) -> defaultdict:
44
+ results = defaultdict()
45
+ for algo_name in self.acc:
46
+ for metric_identifier in self.acc[algo_name]:
47
+ metric = self.acc[algo_name][metric_identifier]
48
+ results[(algo_name, f"t={metric.timestamp_limit}", metric.name)] = (
49
+ metric.micro_result
50
+ )
51
+ return results
52
+
53
+ @property
54
+ def window_level_metrics(self) -> defaultdict:
55
+ results = defaultdict(dict)
56
+ for algo_name in self.acc:
57
+ for metric_identifier in self.acc[algo_name]:
58
+ metric = self.acc[algo_name][metric_identifier]
59
+ score = metric.macro_result
60
+ num_user = metric.num_users
61
+ if score == 0 and num_user == 0:
62
+ logger.info(
63
+ f"Metric {metric.name} for algorithm {algo_name} "
64
+ f"at t={metric.timestamp_limit} has 0 score and 0 users. "
65
+ "The ground truth may be empty due to no interactions occurring in that window."
66
+ )
67
+ elif score == 0 and num_user != 0:
68
+ logger.info(
69
+ f"Metric {metric.name} for algorithm {algo_name} "
70
+ f"at t={metric.timestamp_limit} has 0 score but there are interactions. "
71
+ f"{algo_name} did not have any correct predictions."
72
+ )
73
+ results[(algo_name, f"t={metric.timestamp_limit}", metric.name)]["score"] = score
74
+ results[(algo_name, f"t={metric.timestamp_limit}", metric.name)]["num_user"] = (
75
+ num_user
76
+ )
77
+ return results
78
+
79
+ def df_user_level_metric(self) -> pd.DataFrame:
80
+ """User metric across all timestamps
81
+
82
+ Computation of metrics evaluated on the user level
83
+
84
+ :return: _description_
85
+ :rtype: pd.DataFrame
86
+ """
87
+ df = pd.DataFrame.from_dict(self.user_level_metrics, orient="index").explode(
88
+ ["user_id", "score"]
89
+ )
90
+ df = df.rename_axis(["algorithm", "timestamp", "metric"])
91
+ df.rename(columns={"score": "user_score"}, inplace=True)
92
+ return df
93
+
94
+ def df_window_level_metric(self) -> pd.DataFrame:
95
+ df = pd.DataFrame.from_dict(self.window_level_metrics, orient="index").explode(
96
+ ["score", "num_user"]
97
+ )
98
+ df = df.rename_axis(["algorithm", "timestamp", "metric"])
99
+ df.rename(columns={"score": "window_score"}, inplace=True)
100
+ return df
101
+
102
+ def df_macro_level_metric(self) -> pd.DataFrame:
103
+ """Macro metric across all timestamps
104
+
105
+ :return: _description_
106
+ :rtype: pd.DataFrame
107
+ """
108
+ df = pd.DataFrame.from_dict(self.window_level_metrics, orient="index").explode(
109
+ ["score", "num_user"]
110
+ )
111
+ df = df.rename_axis(["algorithm", "timestamp", "metric"])
112
+ result = df.groupby(["algorithm", "metric"]).mean()["score"].to_frame()
113
+ result["num_window"] = df.groupby(["algorithm", "metric"]).count()["score"]
114
+ result = result.rename(columns={"score": "macro_score"})
115
+ return result
116
+
117
+ def df_micro_level_metric(self) -> pd.DataFrame:
118
+ """Micro metric across all timestamps
119
+
120
+ :return: _description_
121
+ :rtype: pd.DataFrame
122
+ """
123
+ df = pd.DataFrame.from_dict(self.user_level_metrics, orient="index").explode(
124
+ ["user_id", "score"]
125
+ )
126
+ df = df.rename_axis(["algorithm", "timestamp", "metric"])
127
+ result = df.groupby(["algorithm", "metric"])["score"].mean().to_frame()
128
+ result["num_user"] = df.groupby(["algorithm", "metric"])["score"].count()
129
+ result = result.rename(columns={"score": "micro_score"})
130
+ return result
131
+
132
+ def df_metric(
133
+ self,
134
+ filter_timestamp: Optional[int] = None,
135
+ filter_algo: Optional[str] = None,
136
+ level: MetricLevelEnum = MetricLevelEnum.MACRO,
137
+ ) -> pd.DataFrame:
138
+ """Dataframe representation of the metric
139
+
140
+ Returns a dataframe representation of the metric. The dataframe can be
141
+ filtered based on the algorithm name and the timestamp.
142
+
143
+ :param filter_timestamp: Timestamp value to filter on, defaults to None
144
+ :type filter_timestamp: Optional[int], optional
145
+ :param filter_algo: Algorithm name to filter on, defaults to None
146
+ :type filter_algo: Optional[str], optional
147
+ :param level: Level of the metric to compute, defaults to MetricLevelEnum.MACRO
148
+ :type level: MetricLevelEnum, optional
149
+ :return: Dataframe representation of the metric
150
+ :rtype: pd.DataFrame
151
+ """
152
+ if level == MetricLevelEnum.MACRO:
153
+ df = self.df_macro_level_metric()
154
+ elif level == MetricLevelEnum.MICRO:
155
+ df = self.df_micro_level_metric()
156
+ elif level == MetricLevelEnum.WINDOW:
157
+ df = self.df_window_level_metric()
158
+ elif level == MetricLevelEnum.USER:
159
+ df = self.df_user_level_metric()
160
+ else:
161
+ raise ValueError("Invalid level specified")
162
+
163
+ if filter_algo:
164
+ df = df.filter(like=filter_algo, axis=0)
165
+ if filter_timestamp:
166
+ df = df.filter(like=f"t={filter_timestamp}", axis=0)
167
+ return df
@@ -0,0 +1,216 @@
1
+ import logging
2
+ from typing import Literal
3
+
4
+ import pandas as pd
5
+ from scipy.sparse import csr_matrix
6
+
7
+ from ..matrix import PredictionMatrix
8
+ from ..registries import MetricEntry
9
+ from ..settings import EOWSettingError, Setting
10
+ from .accumulator import MetricAccumulator
11
+ from .util import MetricLevelEnum, UserItemBaseStatus
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class EvaluatorBase(object):
18
+ """Base class for evaluator.
19
+
20
+ Provides the common methods and attributes for the evaluator classes. Should
21
+ there be a need to create a new evaluator, it should inherit from this class.
22
+
23
+ Args:
24
+ metric_entries: List of metric entries to compute.
25
+ setting: Setting object.
26
+ ignore_unknown_user: Ignore unknown users, defaults to False.
27
+ ignore_unknown_item: Ignore unknown items, defaults to False.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ metric_entries: list[MetricEntry],
33
+ setting: Setting,
34
+ metric_k: int,
35
+ ignore_unknown_user: bool = False,
36
+ ignore_unknown_item: bool = False,
37
+ seed: int = 42,
38
+ ) -> None:
39
+ self.metric_entries = metric_entries
40
+ self.setting = setting
41
+ """Setting to evaluate the algorithms on."""
42
+ self.metric_k = metric_k
43
+ """Value of K for the metrics."""
44
+ self.ignore_unknown_user = ignore_unknown_user
45
+ """To ignore unknown users during evaluation."""
46
+ self.ignore_unknown_item = ignore_unknown_item
47
+ """To ignore unknown items during evaluation."""
48
+
49
+ self.user_item_base = UserItemBaseStatus()
50
+ self.seed = seed
51
+ self._run_step = 0
52
+ self._acc: MetricAccumulator
53
+ self._current_timestamp: int
54
+
55
+ def _get_evaluation_data(self) -> tuple[PredictionMatrix, PredictionMatrix, int]:
56
+ """Get the evaluation data for the current step.
57
+
58
+ Internal method to get the evaluation data for the current step. The
59
+ evaluation data consists of the unlabeled data, ground truth data, and
60
+ the current timestamp which will be returned as a tuple. The shapes
61
+ are masked based through `user_item_base`. The unknown users in
62
+ the ground truth data are also updated in `user_item_base`.
63
+
64
+ Note:
65
+ `_current_timestamp` is updated with the current timestamp.
66
+
67
+ Returns:
68
+ Tuple of unlabeled data, ground truth data, and current timestamp.
69
+
70
+ Raises:
71
+ EOWSettingError: If there is no more data to be processed.
72
+ """
73
+ try:
74
+ split = self.setting.get_split_at(self._run_step)
75
+ unlabeled_data = split.unlabeled
76
+ ground_truth_data = split.ground_truth
77
+ if split.t_window is None:
78
+ raise ValueError("Timestamp of the current split cannot be None")
79
+ self._current_timestamp = split.t_window
80
+
81
+ unlabeled_data = PredictionMatrix.from_interaction_matrix(unlabeled_data)
82
+ ground_truth_data = PredictionMatrix.from_interaction_matrix(ground_truth_data)
83
+ self._run_step += 1
84
+ except EOWSettingError:
85
+ raise EOWSettingError("There is no more data to be processed, EOW reached")
86
+
87
+ self.user_item_base.update_unknown_user_item_base(ground_truth_data)
88
+
89
+ mask_shape = (self.user_item_base.known_shape[0], self.user_item_base.known_shape[1])
90
+ if not self.ignore_unknown_user:
91
+ mask_shape = (self.user_item_base.global_shape[0], mask_shape[1])
92
+
93
+ unlabeled_data.mask_user_item_shape(
94
+ shape=mask_shape
95
+ )
96
+ ground_truth_data.mask_user_item_shape(
97
+ shape=mask_shape,
98
+ drop_unknown_item=self.ignore_unknown_item,
99
+ inherit_max_id=True, # Ensures that shape of ground truth contains all user id that appears globally
100
+ )
101
+ # get the index of ground_truth_data._df
102
+ if self.ignore_unknown_item:
103
+ unlabeled_data._df = unlabeled_data._df.loc[ground_truth_data._df.index]
104
+ return unlabeled_data, ground_truth_data, self._current_timestamp
105
+
106
+ def _prediction_shape_handler(
107
+ self, y_true: csr_matrix, y_pred: csr_matrix
108
+ ) -> csr_matrix:
109
+ """Handle shape difference of the prediction matrix.
110
+
111
+ If there is a difference in the shape of the prediction matrix and the
112
+ ground truth matrix, this function will handle the difference based on
113
+ `ignore_unknown_user` and `ignore_unknown_item`.
114
+
115
+ Args:
116
+ X_true: Ground truth matrix.
117
+ X_pred: Prediction matrix.
118
+ """
119
+ X_true_shape = y_true.shape
120
+ if y_pred.shape != X_true_shape:
121
+ logger.warning("Prediction matrix shape %s is different from ground truth matrix shape %s.", y_pred.shape, X_true_shape)
122
+ # We cannot expect the algorithm to predict an unknown item, so we
123
+ # only check user dimension
124
+ if y_pred.shape[0] < X_true_shape[0] and not self.ignore_unknown_user: # type: ignore
125
+ raise ValueError(
126
+ "Prediction matrix shape, user dimension, is less than the ground truth matrix shape."
127
+ )
128
+
129
+ if not self.ignore_unknown_item:
130
+ # prediction matrix would not contain unknown item ID
131
+ # update the shape of the prediction matrix to include the ID
132
+ y_pred = csr_matrix(
133
+ (y_pred.data, y_pred.indices, y_pred.indptr),
134
+ shape=(y_pred.shape[0], X_true_shape[1]), # type: ignore
135
+ )
136
+
137
+ # shapes might not be the same in the case of dropping unknowns
138
+ # from the ground truth data. We ensure that the same unknowns
139
+ # are dropped from the predictions
140
+ if self.ignore_unknown_user:
141
+ y_pred = y_pred[: X_true_shape[0], :] # type: ignore
142
+ if self.ignore_unknown_item:
143
+ y_pred = y_pred[:, : X_true_shape[1]] # type: ignore
144
+
145
+ return y_pred
146
+
147
+ def metric_results(
148
+ self,
149
+ level: MetricLevelEnum | Literal["macro", "micro", "window", "user"] = MetricLevelEnum.MACRO,
150
+ only_current_timestamp: None | bool = False,
151
+ filter_timestamp: None | int = None,
152
+ filter_algo: None | str = None,
153
+ ) -> pd.DataFrame:
154
+ """Results of the metrics computed.
155
+
156
+ Computes the metrics of all algorithms based on the level specified and
157
+ return the results in a pandas DataFrame. The results can be filtered
158
+ based on the algorithm name and the current timestamp.
159
+
160
+ Specifics
161
+ ---------
162
+ - User level: User level metrics computed across all timestamps.
163
+ - Window level: Window level metrics computed across all timestamps. This can
164
+ be viewed as a macro level metric in the context of a single window, where
165
+ the scores of each user is averaged within the window.
166
+ - Macro level: Macro level metrics computed for entire timeline. This
167
+ score is computed by averaging the scores of all windows, treating each
168
+ window equally.
169
+ - Micro level: Micro level metrics computed for entire timeline. This
170
+ score is computed by averaging the scores of all users, treating each
171
+ user and the timestamp the user is in as unique contribution to the
172
+ overall score.
173
+
174
+ Args:
175
+ level: Level of the metric to compute, defaults to "macro".
176
+ only_current_timestamp: Filter only the current timestamp, defaults to False.
177
+ filter_timestamp: Timestamp value to filter on, defaults to None.
178
+ If both `only_current_timestamp` and `filter_timestamp` are provided,
179
+ `filter_timestamp` will be used.
180
+ filter_algo: Algorithm name to filter on, defaults to None.
181
+
182
+ Returns:
183
+ Dataframe representation of the metric.
184
+ """
185
+ if isinstance(level, str) and not MetricLevelEnum.has_value(level):
186
+ raise ValueError("Invalid level specified")
187
+ level = MetricLevelEnum(level)
188
+
189
+ if only_current_timestamp and filter_timestamp:
190
+ raise ValueError("Cannot specify both only_current_timestamp and filter_timestamp.")
191
+
192
+ timestamp = None
193
+ if only_current_timestamp:
194
+ timestamp = self._current_timestamp
195
+
196
+ if filter_timestamp:
197
+ timestamp = filter_timestamp
198
+
199
+ return self._acc.df_metric(filter_algo=filter_algo, filter_timestamp=timestamp, level=level)
200
+
201
+ def restore(self) -> None:
202
+ """Restore the generators before pickling.
203
+
204
+ This method is used to restore the generators after loading the object
205
+ from a pickle file.
206
+ """
207
+ self.setting.restore(self._run_step)
208
+ logger.debug("Generators restored")
209
+
210
+ def current_step(self) -> int:
211
+ """Return the current step of the evaluator.
212
+
213
+ Returns:
214
+ Current step of the evaluator.
215
+ """
216
+ return self._run_step
@@ -0,0 +1,125 @@
1
+ """Builder module for constructing evaluator objects.
2
+
3
+ This module provides builder classes for constructing evaluator objects in the
4
+ RecNextEval library. Builders follow the builder pattern to facilitate the
5
+ construction of evaluators with proper validation and error checking.
6
+
7
+ ## Builder Overview
8
+
9
+ The builder pattern is used to construct complex evaluator objects step by step.
10
+ Builders ensure that all necessary components (settings, metrics, algorithms)
11
+ are properly configured before building the evaluator, preventing runtime errors.
12
+
13
+ ## Available Builders
14
+
15
+ - `Builder`: Abstract base class for all builder implementations
16
+ - `EvaluatorPipelineBuilder`: Builder for pipeline evaluators that evaluate
17
+ multiple algorithms on static data
18
+ - `EvaluatorStreamerBuilder`: Builder for streaming evaluators that evaluate
19
+ algorithms on streaming data
20
+
21
+ ## Using Builders
22
+
23
+ ### Basic Pipeline Evaluation
24
+
25
+ To evaluate multiple algorithms on a static dataset using a pipeline evaluator:
26
+
27
+ ```python
28
+ from recnexteval.evaluators.builder import EvaluatorPipelineBuilder
29
+ from recnexteval.settings import Setting
30
+ from recnexteval.datasets import AmazonMusicDataset
31
+
32
+ # Load dataset
33
+ dataset = AmazonMusicDataset()
34
+ data = dataset.load()
35
+
36
+ # Create setting
37
+ setting = Setting(data=data, top_K=10)
38
+ setting.split()
39
+
40
+ # Build evaluator
41
+ builder = EvaluatorPipelineBuilder(seed=42)
42
+ builder.add_setting(setting)
43
+ builder.set_metric_K(10)
44
+ builder.add_metric("PrecisionK")
45
+ builder.add_metric("RecallK")
46
+ builder.add_algorithm("MostPopular")
47
+ builder.add_algorithm("RecentPop", params={"K": 10})
48
+
49
+ evaluator = builder.build()
50
+ results = evaluator.evaluate()
51
+ ```
52
+
53
+ ### Streaming Evaluation
54
+
55
+ To evaluate algorithms on streaming data:
56
+
57
+ ```python
58
+ from recnexteval.evaluators.builder import EvaluatorStreamerBuilder
59
+ from recnexteval.settings import StreamingSetting
60
+ from recnexteval.datasets import AmazonMusicDataset
61
+
62
+ # Load dataset
63
+ dataset = AmazonMusicDataset()
64
+ data = dataset.load()
65
+
66
+ # Create streaming setting
67
+ setting = StreamingSetting(data=data, top_K=10, window_size=1000)
68
+ setting.split()
69
+
70
+ # Build streaming evaluator
71
+ builder = EvaluatorStreamerBuilder(seed=42)
72
+ builder.add_setting(setting)
73
+ builder.set_metric_K(10)
74
+ builder.add_metric("HitK")
75
+ builder.add_metric("NDCGK")
76
+
77
+ evaluator = builder.build()
78
+ # The evaluator can now process streaming data
79
+ ```
80
+
81
+ ### Advanced Configuration
82
+
83
+ Builders support advanced configuration options:
84
+
85
+ ```python
86
+ from recnexteval.evaluators.builder import EvaluatorPipelineBuilder
87
+
88
+ builder = EvaluatorPipelineBuilder(
89
+ ignore_unknown_user=False, # Don't ignore unknown users
90
+ ignore_unknown_item=True, # Ignore unknown items
91
+ seed=123
92
+ )
93
+
94
+ builder.add_setting(setting)
95
+ builder.set_metric_K(20)
96
+
97
+ # Add multiple metrics
98
+ metrics = ["PrecisionK", "RecallK", "DCGK", "NDCGK", "HitK"]
99
+ for metric in metrics:
100
+ builder.add_metric(metric)
101
+
102
+ # Add algorithms with custom parameters
103
+ builder.add_algorithm("ItemKNN", params={"K": 50, "similarity": "cosine"})
104
+ builder.add_algorithm("DecayPop", params={"decay_factor": 0.9})
105
+
106
+ evaluator = builder.build()
107
+ ```
108
+
109
+ ## Extending the Framework
110
+
111
+ To create custom builders, inherit from the `Builder` base class and implement
112
+ the `build()` method. Ensure to call `super().__init__()` and implement proper
113
+ validation in `_check_ready()`.
114
+ """
115
+
116
+ from .base import Builder
117
+ from .pipeline import EvaluatorPipelineBuilder
118
+ from .stream import EvaluatorStreamerBuilder
119
+
120
+
121
+ __all__ = [
122
+ "Builder",
123
+ "EvaluatorPipelineBuilder",
124
+ "EvaluatorStreamerBuilder",
125
+ ]
@@ -0,0 +1,166 @@
1
+ import logging
2
+ from abc import ABC, abstractmethod
3
+ from warnings import warn
4
+
5
+ from recnexteval.registries import (
6
+ METRIC_REGISTRY,
7
+ MetricEntry,
8
+ )
9
+ from recnexteval.settings import Setting
10
+ from recnexteval.utils import arg_to_str
11
+ from ..base import EvaluatorBase
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class Builder(ABC):
18
+ """Base class for Builder objects.
19
+
20
+ Provides methods to set specific values for the builder and enforce checks
21
+ such that the builder can be constructed correctly and to avoid possible
22
+ errors when the builder is executed.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ ignore_unknown_user: bool = True,
28
+ ignore_unknown_item: bool = True,
29
+ seed: int = 42,
30
+ ) -> None:
31
+ """Initialize the Builder.
32
+
33
+ Args:
34
+ ignore_unknown_user: Ignore unknown user in the evaluation.
35
+ ignore_unknown_item: Ignore unknown item in the evaluation.
36
+ seed: Random seed for reproducibility.
37
+ """
38
+ self.metric_entries: dict[str, MetricEntry] = dict()
39
+ """dict of metrics to evaluate algorithm on.
40
+ Using dict instead of list for fast lookup"""
41
+ self.setting: Setting
42
+ """Setting to evaluate the algorithms on"""
43
+ self.ignore_unknown_user = ignore_unknown_user
44
+ """Ignore unknown user in the evaluation"""
45
+ self.ignore_unknown_item = ignore_unknown_item
46
+ """Ignore unknown item in the evaluation"""
47
+ self.metric_k: int
48
+ self.seed: int = seed
49
+
50
+ def _check_setting_exist(self) -> bool:
51
+ """Check if setting is already set.
52
+
53
+ Returns:
54
+ True if setting is set, False otherwise.
55
+ """
56
+ return not (not hasattr(self, "setting") or self.setting is None)
57
+
58
+ def set_metric_K(self, K: int) -> None:
59
+ """Set K value for all metrics.
60
+
61
+ Args:
62
+ K: K value to set for all metrics.
63
+ """
64
+ self.metric_k = K
65
+
66
+ def add_metric(self, metric: str | type) -> None:
67
+ """Add metric to evaluate algorithm on.
68
+
69
+ Metric will be added to the metric_entries dict where it will later be
70
+ converted to a list when the evaluator is constructed.
71
+
72
+ Note:
73
+ If K is not yet specified, the setting's top_K value will be used. This
74
+ requires the setting to be set before adding the metric.
75
+
76
+ Args:
77
+ metric: Metric to evaluate algorithm on.
78
+
79
+ Raises:
80
+ ValueError: If metric is not found in METRIC_REGISTRY.
81
+ RuntimeError: If setting is not set.
82
+ """
83
+ if not self._check_setting_exist():
84
+ raise RuntimeError(
85
+ "Setting has not been set. To ensure conformity, of the addition of"
86
+ " other components please set the setting first. Call add_setting() method."
87
+ )
88
+
89
+ metric = arg_to_str(metric)
90
+
91
+ if metric not in METRIC_REGISTRY:
92
+ raise ValueError(f"Metric {metric} could not be resolved.")
93
+
94
+ if not hasattr(self, "metric_k"):
95
+ self.metric_k = self.setting.top_K
96
+ warn(
97
+ "K value not yet specified before setting metric, using setting's top_K value."
98
+ " We recommend specifying K value for metric. If you want to change the K value,"
99
+ " you can clear all metric entry and set the K value before adding metrics."
100
+ )
101
+
102
+ metric_name = f"{metric}_{self.metric_k}"
103
+ if metric_name in self.metric_entries:
104
+ logger.warning(f"Metric {metric_name} already exists. Skipping adding metric.")
105
+ return
106
+
107
+ self.metric_entries[metric_name] = MetricEntry(metric, self.metric_k)
108
+
109
+ def add_setting(self, setting: Setting) -> None:
110
+ """Add setting to the evaluator builder.
111
+
112
+ Note:
113
+ The setting should be set before adding metrics or algorithms
114
+ to the evaluator.
115
+
116
+ Args:
117
+ setting: Setting to evaluate the algorithms on.
118
+
119
+ Raises:
120
+ ValueError: If setting is not of instance Setting.
121
+ """
122
+ if not isinstance(setting, Setting):
123
+ raise ValueError(f"setting should be of type Setting, got {type(setting)}")
124
+ if hasattr(self, "setting") and self.setting is not None:
125
+ warn("Setting is already set. Continuing will overwrite the setting.")
126
+
127
+ self.setting = setting
128
+
129
+ def clear_metrics(self) -> None:
130
+ """Clear all metrics from the builder."""
131
+ self.metric_entries.clear()
132
+
133
+ def _check_ready(self) -> None:
134
+ """Check if the builder is ready to construct Evaluator.
135
+
136
+ Raises:
137
+ RuntimeError: If there are invalid configurations.
138
+ """
139
+ if not hasattr(self, "metric_k"):
140
+ self.metric_k = self.setting.top_K
141
+ warn(
142
+ "K value not yet specified before setting metric, using setting's top_K value."
143
+ " We recommend specifying K value for metric. If you want to change the K value,"
144
+ " you can clear all metric entry and set the K value before adding metrics."
145
+ )
146
+
147
+ if len(self.metric_entries) == 0:
148
+ raise RuntimeError("No metrics specified, can't construct Evaluator")
149
+
150
+ # Check for settings #
151
+ if self.setting is None:
152
+ raise RuntimeError("No settings specified, can't construct Evaluator")
153
+ if not self.setting.is_ready:
154
+ raise RuntimeError(
155
+ "Setting is not ready, can't construct Evaluator. "
156
+ "Call split() on the setting first."
157
+ )
158
+
159
+ @abstractmethod
160
+ def build(self) -> EvaluatorBase:
161
+ """Build object.
162
+
163
+ Raises:
164
+ NotImplementedError: If the method is not implemented.
165
+ """
166
+ raise NotImplementedError