recnexteval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recnexteval/__init__.py +20 -0
- recnexteval/algorithms/__init__.py +99 -0
- recnexteval/algorithms/base.py +377 -0
- recnexteval/algorithms/baseline/__init__.py +10 -0
- recnexteval/algorithms/baseline/decay_popularity.py +110 -0
- recnexteval/algorithms/baseline/most_popular.py +72 -0
- recnexteval/algorithms/baseline/random.py +39 -0
- recnexteval/algorithms/baseline/recent_popularity.py +34 -0
- recnexteval/algorithms/itemknn/__init__.py +14 -0
- recnexteval/algorithms/itemknn/itemknn.py +119 -0
- recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
- recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
- recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
- recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
- recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
- recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
- recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
- recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
- recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
- recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
- recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
- recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
- recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
- recnexteval/algorithms/utils.py +51 -0
- recnexteval/datasets/__init__.py +109 -0
- recnexteval/datasets/base.py +316 -0
- recnexteval/datasets/config/__init__.py +113 -0
- recnexteval/datasets/config/amazon.py +188 -0
- recnexteval/datasets/config/base.py +72 -0
- recnexteval/datasets/config/lastfm.py +105 -0
- recnexteval/datasets/config/movielens.py +169 -0
- recnexteval/datasets/config/yelp.py +25 -0
- recnexteval/datasets/datasets/__init__.py +24 -0
- recnexteval/datasets/datasets/amazon.py +151 -0
- recnexteval/datasets/datasets/base.py +250 -0
- recnexteval/datasets/datasets/lastfm.py +121 -0
- recnexteval/datasets/datasets/movielens.py +93 -0
- recnexteval/datasets/datasets/test.py +46 -0
- recnexteval/datasets/datasets/yelp.py +103 -0
- recnexteval/datasets/metadata/__init__.py +58 -0
- recnexteval/datasets/metadata/amazon.py +68 -0
- recnexteval/datasets/metadata/base.py +38 -0
- recnexteval/datasets/metadata/lastfm.py +110 -0
- recnexteval/datasets/metadata/movielens.py +87 -0
- recnexteval/evaluators/__init__.py +189 -0
- recnexteval/evaluators/accumulator.py +167 -0
- recnexteval/evaluators/base.py +216 -0
- recnexteval/evaluators/builder/__init__.py +125 -0
- recnexteval/evaluators/builder/base.py +166 -0
- recnexteval/evaluators/builder/pipeline.py +111 -0
- recnexteval/evaluators/builder/stream.py +54 -0
- recnexteval/evaluators/evaluator_pipeline.py +287 -0
- recnexteval/evaluators/evaluator_stream.py +374 -0
- recnexteval/evaluators/state_management.py +310 -0
- recnexteval/evaluators/strategy.py +32 -0
- recnexteval/evaluators/util.py +124 -0
- recnexteval/matrix/__init__.py +48 -0
- recnexteval/matrix/exception.py +5 -0
- recnexteval/matrix/interaction_matrix.py +784 -0
- recnexteval/matrix/prediction_matrix.py +153 -0
- recnexteval/matrix/util.py +24 -0
- recnexteval/metrics/__init__.py +57 -0
- recnexteval/metrics/binary/__init__.py +4 -0
- recnexteval/metrics/binary/hit.py +49 -0
- recnexteval/metrics/core/__init__.py +10 -0
- recnexteval/metrics/core/base.py +126 -0
- recnexteval/metrics/core/elementwise_top_k.py +75 -0
- recnexteval/metrics/core/listwise_top_k.py +72 -0
- recnexteval/metrics/core/top_k.py +60 -0
- recnexteval/metrics/core/util.py +29 -0
- recnexteval/metrics/ranking/__init__.py +6 -0
- recnexteval/metrics/ranking/dcg.py +55 -0
- recnexteval/metrics/ranking/ndcg.py +78 -0
- recnexteval/metrics/ranking/precision.py +51 -0
- recnexteval/metrics/ranking/recall.py +42 -0
- recnexteval/models/__init__.py +4 -0
- recnexteval/models/base.py +69 -0
- recnexteval/preprocessing/__init__.py +37 -0
- recnexteval/preprocessing/filter.py +181 -0
- recnexteval/preprocessing/preprocessor.py +137 -0
- recnexteval/registries/__init__.py +67 -0
- recnexteval/registries/algorithm.py +68 -0
- recnexteval/registries/base.py +131 -0
- recnexteval/registries/dataset.py +37 -0
- recnexteval/registries/metric.py +57 -0
- recnexteval/settings/__init__.py +127 -0
- recnexteval/settings/base.py +414 -0
- recnexteval/settings/exception.py +8 -0
- recnexteval/settings/leave_n_out_setting.py +48 -0
- recnexteval/settings/processor.py +115 -0
- recnexteval/settings/schema.py +11 -0
- recnexteval/settings/single_time_point_setting.py +111 -0
- recnexteval/settings/sliding_window_setting.py +153 -0
- recnexteval/settings/splitters/__init__.py +14 -0
- recnexteval/settings/splitters/base.py +57 -0
- recnexteval/settings/splitters/n_last.py +39 -0
- recnexteval/settings/splitters/n_last_timestamp.py +76 -0
- recnexteval/settings/splitters/timestamp.py +82 -0
- recnexteval/settings/util.py +0 -0
- recnexteval/utils/__init__.py +115 -0
- recnexteval/utils/json_to_csv_converter.py +128 -0
- recnexteval/utils/logging_tools.py +159 -0
- recnexteval/utils/path.py +155 -0
- recnexteval/utils/url_certificate_installer.py +54 -0
- recnexteval/utils/util.py +166 -0
- recnexteval/utils/uuid_util.py +7 -0
- recnexteval/utils/yaml_tool.py +65 -0
- recnexteval-0.1.0.dist-info/METADATA +85 -0
- recnexteval-0.1.0.dist-info/RECORD +110 -0
- recnexteval-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from .state_management import AlgorithmStateManager
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EvaluationStrategy(ABC):
|
|
7
|
+
"""Abstract strategy for different evaluation modes"""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def should_advance_window(self, algo_state_mgr: AlgorithmStateManager, current_step: int, total_steps: int) -> bool:
|
|
11
|
+
"""Determine if should advance to next window"""
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SlidingWindowStrategy(EvaluationStrategy):
|
|
16
|
+
"""Strategy for sliding window evaluation"""
|
|
17
|
+
|
|
18
|
+
def should_advance_window(self, algo_state_mgr: AlgorithmStateManager, current_step: int, total_steps: int) -> bool:
|
|
19
|
+
"""Advance only when all algorithms predicted"""
|
|
20
|
+
return (
|
|
21
|
+
algo_state_mgr.is_all_predicted()
|
|
22
|
+
and algo_state_mgr.is_all_same_data_segment()
|
|
23
|
+
and current_step < total_steps
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class SingleTimePointStrategy(EvaluationStrategy):
|
|
28
|
+
"""Strategy for sliding window evaluation"""
|
|
29
|
+
|
|
30
|
+
def should_advance_window(self, algo_state_mgr: AlgorithmStateManager, current_step: int, total_steps: int) -> bool:
|
|
31
|
+
"""Advance only when all algorithms predicted"""
|
|
32
|
+
return algo_state_mgr.is_all_predicted() and current_step < total_steps
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from enum import StrEnum
|
|
4
|
+
|
|
5
|
+
from recnexteval.matrix import InteractionMatrix
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MetricLevelEnum(StrEnum):
|
|
12
|
+
MICRO = "micro"
|
|
13
|
+
MACRO = "macro"
|
|
14
|
+
WINDOW = "window"
|
|
15
|
+
USER = "user"
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def has_value(cls, value: str) -> bool:
|
|
19
|
+
"""Check valid value for MetricLevelEnum.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
value: String value input.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Whether the value is valid.
|
|
26
|
+
"""
|
|
27
|
+
return value in MetricLevelEnum
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class UserItemBaseStatus:
|
|
32
|
+
"""Unknown and known user/item base.
|
|
33
|
+
|
|
34
|
+
This class is used to store the status of the user and item base. The class
|
|
35
|
+
stores the known and unknown user and item set. The class also provides
|
|
36
|
+
methods to update the known and unknown user and item set.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
unknown_user: set[int] = field(default_factory=set)
|
|
40
|
+
known_user: set[int] = field(default_factory=set)
|
|
41
|
+
unknown_item: set[int] = field(default_factory=set)
|
|
42
|
+
known_item: set[int] = field(default_factory=set)
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def known_shape(self) -> tuple[int, int]:
|
|
46
|
+
"""Known shape of the user-item interaction matrix.
|
|
47
|
+
|
|
48
|
+
This is the shape of the released user/item interaction matrix to the
|
|
49
|
+
algorithm. This shape follows from assumption in the dataset that
|
|
50
|
+
ID increment in the order of time.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Tuple of (|user|, |item|).
|
|
54
|
+
"""
|
|
55
|
+
return (len(self.known_user), len(self.known_item))
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def global_shape(self) -> tuple[int, int]:
|
|
59
|
+
"""Global shape of the user-item interaction matrix.
|
|
60
|
+
|
|
61
|
+
This is the shape of the user-item interaction matrix considering all
|
|
62
|
+
the users and items that has been possibly exposed. The global shape
|
|
63
|
+
considers the fact that an unknown user/item can be exposed during the
|
|
64
|
+
prediction stage when an unknown user/item id is requested for prediction
|
|
65
|
+
on the algorithm.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Tuple of (|user|, |item|).
|
|
69
|
+
"""
|
|
70
|
+
return (
|
|
71
|
+
len(self.known_user) + len(self.unknown_user),
|
|
72
|
+
len(self.known_item) + len(self.unknown_item),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def global_user_ids(self) -> set[int]:
|
|
77
|
+
"""Set of global user ids.
|
|
78
|
+
|
|
79
|
+
Returns the set of global user ids. The global user ids are the union of
|
|
80
|
+
known and unknown user ids.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
set[int]: Set of global user ids.
|
|
84
|
+
"""
|
|
85
|
+
return self.known_user.union(self.unknown_user)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def global_item_ids(self) -> set[int]:
|
|
89
|
+
"""Set of global item ids.
|
|
90
|
+
|
|
91
|
+
Returns the set of global item ids. The global item ids are the union of
|
|
92
|
+
known and unknown item ids.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
set[int]: Set of global item ids.
|
|
96
|
+
"""
|
|
97
|
+
return self.known_item.union(self.unknown_item)
|
|
98
|
+
|
|
99
|
+
def update_known_user_item_base(self, data: InteractionMatrix) -> None:
|
|
100
|
+
"""Updates the known user and item set with the data.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
data (InteractionMatrix): Data to update the known user and item set with.
|
|
104
|
+
"""
|
|
105
|
+
self.known_item.update(data.item_ids)
|
|
106
|
+
self.known_user.update(data.user_ids)
|
|
107
|
+
|
|
108
|
+
def update_unknown_user_item_base(self, data: InteractionMatrix) -> None:
|
|
109
|
+
"""Updates the unknown user and item set with the data.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
data (InteractionMatrix): Data to update the unknown user and item set with.
|
|
113
|
+
"""
|
|
114
|
+
self.unknown_user = data.user_ids.difference(self.known_user)
|
|
115
|
+
self.unknown_item = data.item_ids.difference(self.known_item)
|
|
116
|
+
|
|
117
|
+
def reset_unknown_user_item_base(self) -> None:
|
|
118
|
+
"""Clears the unknown user and item set.
|
|
119
|
+
|
|
120
|
+
This method clears the unknown user and item set. This method should be
|
|
121
|
+
called after the Phase 3 when the data release is done.
|
|
122
|
+
"""
|
|
123
|
+
self.unknown_user = set()
|
|
124
|
+
self.unknown_item = set()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
recnexteval.matrix
|
|
3
|
+
|
|
4
|
+
This module provides classes and utilities for handling interaction matrices in recommendation systems.
|
|
5
|
+
|
|
6
|
+
The core component is the InteractionMatrix, which represents user-item interactions as a structured matrix.
|
|
7
|
+
It stores interaction data in a pandas DataFrame and offers methods for filtering, masking, and converting to sparse matrices.
|
|
8
|
+
This is essential for building and evaluating recommender algorithms, such as collaborative filtering, where interactions
|
|
9
|
+
between users and items need to be efficiently processed.
|
|
10
|
+
|
|
11
|
+
Use cases include:
|
|
12
|
+
- Preprocessing interaction data for training recommendation models.
|
|
13
|
+
- Handling temporal data with timestamp-based filtering (e.g., recent interactions).
|
|
14
|
+
- Masking unknown users/items during evaluation to prevent data leakage.
|
|
15
|
+
- Converting data to CSR format for efficient matrix operations in libraries like SciPy.
|
|
16
|
+
|
|
17
|
+
Classes:
|
|
18
|
+
InteractionMatrix: The main class for creating and manipulating interaction matrices from datasets.
|
|
19
|
+
It supports operations like filtering by users/items, timestamps, and shape masking.
|
|
20
|
+
PredictionMatrix: A subclass of InteractionMatrix tailored for prediction-related operations.
|
|
21
|
+
It provides masking for the expected (user, item) exposed.
|
|
22
|
+
|
|
23
|
+
Enums:
|
|
24
|
+
ItemUserBasedEnum: Enum for specifying whether operations are item-based or user-based.
|
|
25
|
+
Used in methods that group or filter data by users or items.
|
|
26
|
+
|
|
27
|
+
Exceptions:
|
|
28
|
+
TimestampAttributeMissingError: Raised when required timestamp attributes are missing from the data.
|
|
29
|
+
Ensures that time-aware operations are only performed on timestamped data.
|
|
30
|
+
|
|
31
|
+
Functions:
|
|
32
|
+
to_csr_matrix: Utility function to convert data structures to CSR matrix format.
|
|
33
|
+
Useful for creating sparse representations of interaction data for computational efficiency.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from .exception import TimestampAttributeMissingError
|
|
37
|
+
from .interaction_matrix import InteractionMatrix, ItemUserBasedEnum
|
|
38
|
+
from .prediction_matrix import PredictionMatrix
|
|
39
|
+
from .util import to_csr_matrix
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"InteractionMatrix",
|
|
44
|
+
"PredictionMatrix",
|
|
45
|
+
"to_csr_matrix",
|
|
46
|
+
"ItemUserBasedEnum",
|
|
47
|
+
"TimestampAttributeMissingError",
|
|
48
|
+
]
|