recnexteval 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. recnexteval/__init__.py +20 -0
  2. recnexteval/algorithms/__init__.py +99 -0
  3. recnexteval/algorithms/base.py +377 -0
  4. recnexteval/algorithms/baseline/__init__.py +10 -0
  5. recnexteval/algorithms/baseline/decay_popularity.py +110 -0
  6. recnexteval/algorithms/baseline/most_popular.py +72 -0
  7. recnexteval/algorithms/baseline/random.py +39 -0
  8. recnexteval/algorithms/baseline/recent_popularity.py +34 -0
  9. recnexteval/algorithms/itemknn/__init__.py +14 -0
  10. recnexteval/algorithms/itemknn/itemknn.py +119 -0
  11. recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
  12. recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
  13. recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
  14. recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
  15. recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
  16. recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
  17. recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
  18. recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
  19. recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
  20. recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
  21. recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
  22. recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
  23. recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
  24. recnexteval/algorithms/utils.py +51 -0
  25. recnexteval/datasets/__init__.py +109 -0
  26. recnexteval/datasets/base.py +316 -0
  27. recnexteval/datasets/config/__init__.py +113 -0
  28. recnexteval/datasets/config/amazon.py +188 -0
  29. recnexteval/datasets/config/base.py +72 -0
  30. recnexteval/datasets/config/lastfm.py +105 -0
  31. recnexteval/datasets/config/movielens.py +169 -0
  32. recnexteval/datasets/config/yelp.py +25 -0
  33. recnexteval/datasets/datasets/__init__.py +24 -0
  34. recnexteval/datasets/datasets/amazon.py +151 -0
  35. recnexteval/datasets/datasets/base.py +250 -0
  36. recnexteval/datasets/datasets/lastfm.py +121 -0
  37. recnexteval/datasets/datasets/movielens.py +93 -0
  38. recnexteval/datasets/datasets/test.py +46 -0
  39. recnexteval/datasets/datasets/yelp.py +103 -0
  40. recnexteval/datasets/metadata/__init__.py +58 -0
  41. recnexteval/datasets/metadata/amazon.py +68 -0
  42. recnexteval/datasets/metadata/base.py +38 -0
  43. recnexteval/datasets/metadata/lastfm.py +110 -0
  44. recnexteval/datasets/metadata/movielens.py +87 -0
  45. recnexteval/evaluators/__init__.py +189 -0
  46. recnexteval/evaluators/accumulator.py +167 -0
  47. recnexteval/evaluators/base.py +216 -0
  48. recnexteval/evaluators/builder/__init__.py +125 -0
  49. recnexteval/evaluators/builder/base.py +166 -0
  50. recnexteval/evaluators/builder/pipeline.py +111 -0
  51. recnexteval/evaluators/builder/stream.py +54 -0
  52. recnexteval/evaluators/evaluator_pipeline.py +287 -0
  53. recnexteval/evaluators/evaluator_stream.py +374 -0
  54. recnexteval/evaluators/state_management.py +310 -0
  55. recnexteval/evaluators/strategy.py +32 -0
  56. recnexteval/evaluators/util.py +124 -0
  57. recnexteval/matrix/__init__.py +48 -0
  58. recnexteval/matrix/exception.py +5 -0
  59. recnexteval/matrix/interaction_matrix.py +784 -0
  60. recnexteval/matrix/prediction_matrix.py +153 -0
  61. recnexteval/matrix/util.py +24 -0
  62. recnexteval/metrics/__init__.py +57 -0
  63. recnexteval/metrics/binary/__init__.py +4 -0
  64. recnexteval/metrics/binary/hit.py +49 -0
  65. recnexteval/metrics/core/__init__.py +10 -0
  66. recnexteval/metrics/core/base.py +126 -0
  67. recnexteval/metrics/core/elementwise_top_k.py +75 -0
  68. recnexteval/metrics/core/listwise_top_k.py +72 -0
  69. recnexteval/metrics/core/top_k.py +60 -0
  70. recnexteval/metrics/core/util.py +29 -0
  71. recnexteval/metrics/ranking/__init__.py +6 -0
  72. recnexteval/metrics/ranking/dcg.py +55 -0
  73. recnexteval/metrics/ranking/ndcg.py +78 -0
  74. recnexteval/metrics/ranking/precision.py +51 -0
  75. recnexteval/metrics/ranking/recall.py +42 -0
  76. recnexteval/models/__init__.py +4 -0
  77. recnexteval/models/base.py +69 -0
  78. recnexteval/preprocessing/__init__.py +37 -0
  79. recnexteval/preprocessing/filter.py +181 -0
  80. recnexteval/preprocessing/preprocessor.py +137 -0
  81. recnexteval/registries/__init__.py +67 -0
  82. recnexteval/registries/algorithm.py +68 -0
  83. recnexteval/registries/base.py +131 -0
  84. recnexteval/registries/dataset.py +37 -0
  85. recnexteval/registries/metric.py +57 -0
  86. recnexteval/settings/__init__.py +127 -0
  87. recnexteval/settings/base.py +414 -0
  88. recnexteval/settings/exception.py +8 -0
  89. recnexteval/settings/leave_n_out_setting.py +48 -0
  90. recnexteval/settings/processor.py +115 -0
  91. recnexteval/settings/schema.py +11 -0
  92. recnexteval/settings/single_time_point_setting.py +111 -0
  93. recnexteval/settings/sliding_window_setting.py +153 -0
  94. recnexteval/settings/splitters/__init__.py +14 -0
  95. recnexteval/settings/splitters/base.py +57 -0
  96. recnexteval/settings/splitters/n_last.py +39 -0
  97. recnexteval/settings/splitters/n_last_timestamp.py +76 -0
  98. recnexteval/settings/splitters/timestamp.py +82 -0
  99. recnexteval/settings/util.py +0 -0
  100. recnexteval/utils/__init__.py +115 -0
  101. recnexteval/utils/json_to_csv_converter.py +128 -0
  102. recnexteval/utils/logging_tools.py +159 -0
  103. recnexteval/utils/path.py +155 -0
  104. recnexteval/utils/url_certificate_installer.py +54 -0
  105. recnexteval/utils/util.py +166 -0
  106. recnexteval/utils/uuid_util.py +7 -0
  107. recnexteval/utils/yaml_tool.py +65 -0
  108. recnexteval-0.1.0.dist-info/METADATA +85 -0
  109. recnexteval-0.1.0.dist-info/RECORD +110 -0
  110. recnexteval-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,32 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from .state_management import AlgorithmStateManager
4
+
5
+
6
+ class EvaluationStrategy(ABC):
7
+ """Abstract strategy for different evaluation modes"""
8
+
9
+ @abstractmethod
10
+ def should_advance_window(self, algo_state_mgr: AlgorithmStateManager, current_step: int, total_steps: int) -> bool:
11
+ """Determine if should advance to next window"""
12
+ pass
13
+
14
+
15
+ class SlidingWindowStrategy(EvaluationStrategy):
16
+ """Strategy for sliding window evaluation"""
17
+
18
+ def should_advance_window(self, algo_state_mgr: AlgorithmStateManager, current_step: int, total_steps: int) -> bool:
19
+ """Advance only when all algorithms predicted"""
20
+ return (
21
+ algo_state_mgr.is_all_predicted()
22
+ and algo_state_mgr.is_all_same_data_segment()
23
+ and current_step < total_steps
24
+ )
25
+
26
+
27
+ class SingleTimePointStrategy(EvaluationStrategy):
28
+ """Strategy for sliding window evaluation"""
29
+
30
+ def should_advance_window(self, algo_state_mgr: AlgorithmStateManager, current_step: int, total_steps: int) -> bool:
31
+ """Advance only when all algorithms predicted"""
32
+ return algo_state_mgr.is_all_predicted() and current_step < total_steps
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from dataclasses import dataclass, field
3
+ from enum import StrEnum
4
+
5
+ from recnexteval.matrix import InteractionMatrix
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class MetricLevelEnum(StrEnum):
12
+ MICRO = "micro"
13
+ MACRO = "macro"
14
+ WINDOW = "window"
15
+ USER = "user"
16
+
17
+ @classmethod
18
+ def has_value(cls, value: str) -> bool:
19
+ """Check valid value for MetricLevelEnum.
20
+
21
+ Args:
22
+ value: String value input.
23
+
24
+ Returns:
25
+ Whether the value is valid.
26
+ """
27
+ return value in MetricLevelEnum
28
+
29
+
30
+ @dataclass
31
+ class UserItemBaseStatus:
32
+ """Unknown and known user/item base.
33
+
34
+ This class is used to store the status of the user and item base. The class
35
+ stores the known and unknown user and item set. The class also provides
36
+ methods to update the known and unknown user and item set.
37
+ """
38
+
39
+ unknown_user: set[int] = field(default_factory=set)
40
+ known_user: set[int] = field(default_factory=set)
41
+ unknown_item: set[int] = field(default_factory=set)
42
+ known_item: set[int] = field(default_factory=set)
43
+
44
+ @property
45
+ def known_shape(self) -> tuple[int, int]:
46
+ """Known shape of the user-item interaction matrix.
47
+
48
+ This is the shape of the released user/item interaction matrix to the
49
+ algorithm. This shape follows from assumption in the dataset that
50
+ ID increment in the order of time.
51
+
52
+ Returns:
53
+ Tuple of (|user|, |item|).
54
+ """
55
+ return (len(self.known_user), len(self.known_item))
56
+
57
+ @property
58
+ def global_shape(self) -> tuple[int, int]:
59
+ """Global shape of the user-item interaction matrix.
60
+
61
+ This is the shape of the user-item interaction matrix considering all
62
+ the users and items that has been possibly exposed. The global shape
63
+ considers the fact that an unknown user/item can be exposed during the
64
+ prediction stage when an unknown user/item id is requested for prediction
65
+ on the algorithm.
66
+
67
+ Returns:
68
+ Tuple of (|user|, |item|).
69
+ """
70
+ return (
71
+ len(self.known_user) + len(self.unknown_user),
72
+ len(self.known_item) + len(self.unknown_item),
73
+ )
74
+
75
+ @property
76
+ def global_user_ids(self) -> set[int]:
77
+ """Set of global user ids.
78
+
79
+ Returns the set of global user ids. The global user ids are the union of
80
+ known and unknown user ids.
81
+
82
+ Returns:
83
+ set[int]: Set of global user ids.
84
+ """
85
+ return self.known_user.union(self.unknown_user)
86
+
87
+ @property
88
+ def global_item_ids(self) -> set[int]:
89
+ """Set of global item ids.
90
+
91
+ Returns the set of global item ids. The global item ids are the union of
92
+ known and unknown item ids.
93
+
94
+ Returns:
95
+ set[int]: Set of global item ids.
96
+ """
97
+ return self.known_item.union(self.unknown_item)
98
+
99
+ def update_known_user_item_base(self, data: InteractionMatrix) -> None:
100
+ """Updates the known user and item set with the data.
101
+
102
+ Args:
103
+ data (InteractionMatrix): Data to update the known user and item set with.
104
+ """
105
+ self.known_item.update(data.item_ids)
106
+ self.known_user.update(data.user_ids)
107
+
108
+ def update_unknown_user_item_base(self, data: InteractionMatrix) -> None:
109
+ """Updates the unknown user and item set with the data.
110
+
111
+ Args:
112
+ data (InteractionMatrix): Data to update the unknown user and item set with.
113
+ """
114
+ self.unknown_user = data.user_ids.difference(self.known_user)
115
+ self.unknown_item = data.item_ids.difference(self.known_item)
116
+
117
+ def reset_unknown_user_item_base(self) -> None:
118
+ """Clears the unknown user and item set.
119
+
120
+ This method clears the unknown user and item set. This method should be
121
+ called after the Phase 3 when the data release is done.
122
+ """
123
+ self.unknown_user = set()
124
+ self.unknown_item = set()
@@ -0,0 +1,48 @@
1
+ """
2
+ recnexteval.matrix
3
+
4
+ This module provides classes and utilities for handling interaction matrices in recommendation systems.
5
+
6
+ The core component is the InteractionMatrix, which represents user-item interactions as a structured matrix.
7
+ It stores interaction data in a pandas DataFrame and offers methods for filtering, masking, and converting to sparse matrices.
8
+ This is essential for building and evaluating recommender algorithms, such as collaborative filtering, where interactions
9
+ between users and items need to be efficiently processed.
10
+
11
+ Use cases include:
12
+ - Preprocessing interaction data for training recommendation models.
13
+ - Handling temporal data with timestamp-based filtering (e.g., recent interactions).
14
+ - Masking unknown users/items during evaluation to prevent data leakage.
15
+ - Converting data to CSR format for efficient matrix operations in libraries like SciPy.
16
+
17
+ Classes:
18
+ InteractionMatrix: The main class for creating and manipulating interaction matrices from datasets.
19
+ It supports operations like filtering by users/items, timestamps, and shape masking.
20
+ PredictionMatrix: A subclass of InteractionMatrix tailored for prediction-related operations.
21
+ It provides masking for the expected (user, item) exposed.
22
+
23
+ Enums:
24
+ ItemUserBasedEnum: Enum for specifying whether operations are item-based or user-based.
25
+ Used in methods that group or filter data by users or items.
26
+
27
+ Exceptions:
28
+ TimestampAttributeMissingError: Raised when required timestamp attributes are missing from the data.
29
+ Ensures that time-aware operations are only performed on timestamped data.
30
+
31
+ Functions:
32
+ to_csr_matrix: Utility function to convert data structures to CSR matrix format.
33
+ Useful for creating sparse representations of interaction data for computational efficiency.
34
+ """
35
+
36
+ from .exception import TimestampAttributeMissingError
37
+ from .interaction_matrix import InteractionMatrix, ItemUserBasedEnum
38
+ from .prediction_matrix import PredictionMatrix
39
+ from .util import to_csr_matrix
40
+
41
+
42
+ __all__ = [
43
+ "InteractionMatrix",
44
+ "PredictionMatrix",
45
+ "to_csr_matrix",
46
+ "ItemUserBasedEnum",
47
+ "TimestampAttributeMissingError",
48
+ ]
@@ -0,0 +1,5 @@
1
+ class TimestampAttributeMissingError(Exception):
2
+ """Error raised when timestamp attribute is missing."""
3
+
4
+ def __init__(self, message: str = "InteractionMatrix is missing timestamps.") -> None:
5
+ super().__init__(message)