recnexteval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- recnexteval/__init__.py +20 -0
- recnexteval/algorithms/__init__.py +99 -0
- recnexteval/algorithms/base.py +377 -0
- recnexteval/algorithms/baseline/__init__.py +10 -0
- recnexteval/algorithms/baseline/decay_popularity.py +110 -0
- recnexteval/algorithms/baseline/most_popular.py +72 -0
- recnexteval/algorithms/baseline/random.py +39 -0
- recnexteval/algorithms/baseline/recent_popularity.py +34 -0
- recnexteval/algorithms/itemknn/__init__.py +14 -0
- recnexteval/algorithms/itemknn/itemknn.py +119 -0
- recnexteval/algorithms/itemknn/itemknn_incremental.py +65 -0
- recnexteval/algorithms/itemknn/itemknn_incremental_movielens.py +95 -0
- recnexteval/algorithms/itemknn/itemknn_rolling.py +17 -0
- recnexteval/algorithms/itemknn/itemknn_static.py +31 -0
- recnexteval/algorithms/time_aware_item_knn/__init__.py +11 -0
- recnexteval/algorithms/time_aware_item_knn/base.py +248 -0
- recnexteval/algorithms/time_aware_item_knn/decay_functions.py +260 -0
- recnexteval/algorithms/time_aware_item_knn/ding_2005.py +52 -0
- recnexteval/algorithms/time_aware_item_knn/liu_2010.py +65 -0
- recnexteval/algorithms/time_aware_item_knn/similarity_functions.py +106 -0
- recnexteval/algorithms/time_aware_item_knn/top_k.py +61 -0
- recnexteval/algorithms/time_aware_item_knn/utils.py +47 -0
- recnexteval/algorithms/time_aware_item_knn/vaz_2013.py +50 -0
- recnexteval/algorithms/utils.py +51 -0
- recnexteval/datasets/__init__.py +109 -0
- recnexteval/datasets/base.py +316 -0
- recnexteval/datasets/config/__init__.py +113 -0
- recnexteval/datasets/config/amazon.py +188 -0
- recnexteval/datasets/config/base.py +72 -0
- recnexteval/datasets/config/lastfm.py +105 -0
- recnexteval/datasets/config/movielens.py +169 -0
- recnexteval/datasets/config/yelp.py +25 -0
- recnexteval/datasets/datasets/__init__.py +24 -0
- recnexteval/datasets/datasets/amazon.py +151 -0
- recnexteval/datasets/datasets/base.py +250 -0
- recnexteval/datasets/datasets/lastfm.py +121 -0
- recnexteval/datasets/datasets/movielens.py +93 -0
- recnexteval/datasets/datasets/test.py +46 -0
- recnexteval/datasets/datasets/yelp.py +103 -0
- recnexteval/datasets/metadata/__init__.py +58 -0
- recnexteval/datasets/metadata/amazon.py +68 -0
- recnexteval/datasets/metadata/base.py +38 -0
- recnexteval/datasets/metadata/lastfm.py +110 -0
- recnexteval/datasets/metadata/movielens.py +87 -0
- recnexteval/evaluators/__init__.py +189 -0
- recnexteval/evaluators/accumulator.py +167 -0
- recnexteval/evaluators/base.py +216 -0
- recnexteval/evaluators/builder/__init__.py +125 -0
- recnexteval/evaluators/builder/base.py +166 -0
- recnexteval/evaluators/builder/pipeline.py +111 -0
- recnexteval/evaluators/builder/stream.py +54 -0
- recnexteval/evaluators/evaluator_pipeline.py +287 -0
- recnexteval/evaluators/evaluator_stream.py +374 -0
- recnexteval/evaluators/state_management.py +310 -0
- recnexteval/evaluators/strategy.py +32 -0
- recnexteval/evaluators/util.py +124 -0
- recnexteval/matrix/__init__.py +48 -0
- recnexteval/matrix/exception.py +5 -0
- recnexteval/matrix/interaction_matrix.py +784 -0
- recnexteval/matrix/prediction_matrix.py +153 -0
- recnexteval/matrix/util.py +24 -0
- recnexteval/metrics/__init__.py +57 -0
- recnexteval/metrics/binary/__init__.py +4 -0
- recnexteval/metrics/binary/hit.py +49 -0
- recnexteval/metrics/core/__init__.py +10 -0
- recnexteval/metrics/core/base.py +126 -0
- recnexteval/metrics/core/elementwise_top_k.py +75 -0
- recnexteval/metrics/core/listwise_top_k.py +72 -0
- recnexteval/metrics/core/top_k.py +60 -0
- recnexteval/metrics/core/util.py +29 -0
- recnexteval/metrics/ranking/__init__.py +6 -0
- recnexteval/metrics/ranking/dcg.py +55 -0
- recnexteval/metrics/ranking/ndcg.py +78 -0
- recnexteval/metrics/ranking/precision.py +51 -0
- recnexteval/metrics/ranking/recall.py +42 -0
- recnexteval/models/__init__.py +4 -0
- recnexteval/models/base.py +69 -0
- recnexteval/preprocessing/__init__.py +37 -0
- recnexteval/preprocessing/filter.py +181 -0
- recnexteval/preprocessing/preprocessor.py +137 -0
- recnexteval/registries/__init__.py +67 -0
- recnexteval/registries/algorithm.py +68 -0
- recnexteval/registries/base.py +131 -0
- recnexteval/registries/dataset.py +37 -0
- recnexteval/registries/metric.py +57 -0
- recnexteval/settings/__init__.py +127 -0
- recnexteval/settings/base.py +414 -0
- recnexteval/settings/exception.py +8 -0
- recnexteval/settings/leave_n_out_setting.py +48 -0
- recnexteval/settings/processor.py +115 -0
- recnexteval/settings/schema.py +11 -0
- recnexteval/settings/single_time_point_setting.py +111 -0
- recnexteval/settings/sliding_window_setting.py +153 -0
- recnexteval/settings/splitters/__init__.py +14 -0
- recnexteval/settings/splitters/base.py +57 -0
- recnexteval/settings/splitters/n_last.py +39 -0
- recnexteval/settings/splitters/n_last_timestamp.py +76 -0
- recnexteval/settings/splitters/timestamp.py +82 -0
- recnexteval/settings/util.py +0 -0
- recnexteval/utils/__init__.py +115 -0
- recnexteval/utils/json_to_csv_converter.py +128 -0
- recnexteval/utils/logging_tools.py +159 -0
- recnexteval/utils/path.py +155 -0
- recnexteval/utils/url_certificate_installer.py +54 -0
- recnexteval/utils/util.py +166 -0
- recnexteval/utils/uuid_util.py +7 -0
- recnexteval/utils/yaml_tool.py +65 -0
- recnexteval-0.1.0.dist-info/METADATA +85 -0
- recnexteval-0.1.0.dist-info/RECORD +110 -0
- recnexteval-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Registries for algorithms, metrics, and datasets.
|
|
2
|
+
|
|
3
|
+
This module provides registries for storing and managing algorithms, metrics,
|
|
4
|
+
and datasets used in experiments. Registries help keep track of valid classes
|
|
5
|
+
and enable easy instantiation of components.
|
|
6
|
+
|
|
7
|
+
## Registries
|
|
8
|
+
|
|
9
|
+
Registries store algorithms, metrics, and datasets by default and allow
|
|
10
|
+
registration of new components via the `register` function.
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
```python
|
|
14
|
+
from recnexteval.pipelines import ALGORITHM_REGISTRY
|
|
15
|
+
from recnexteval.algorithms import ItemKNNStatic
|
|
16
|
+
|
|
17
|
+
algo = ALGORITHM_REGISTRY.get("ItemKNNStatic")(K=10)
|
|
18
|
+
ALGORITHM_REGISTRY.register("algo_1", ItemKNNStatic)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Available Registries
|
|
22
|
+
|
|
23
|
+
- `ALGORITHM_REGISTRY`: Registry for algorithms
|
|
24
|
+
- `DATASET_REGISTRY`: Registry for datasets
|
|
25
|
+
- `METRIC_REGISTRY`: Registry for metrics
|
|
26
|
+
- `AlgorithmRegistry`: Class for creating algorithm registries
|
|
27
|
+
- `DatasetRegistry`: Class for creating dataset registries
|
|
28
|
+
- `MetricRegistry`: Class for creating metric registries
|
|
29
|
+
|
|
30
|
+
## Entries
|
|
31
|
+
|
|
32
|
+
Entries store algorithms and metrics in registries. They maintain the class
|
|
33
|
+
and parameters used to instantiate each component. These entries are used by
|
|
34
|
+
`EvaluatorPipeline` to instantiate algorithms and metrics.
|
|
35
|
+
|
|
36
|
+
### Available Entries
|
|
37
|
+
|
|
38
|
+
- `AlgorithmEntry`: Entry for algorithms
|
|
39
|
+
- `MetricEntry`: Entry for metrics
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from .algorithm import (
|
|
44
|
+
ALGORITHM_REGISTRY,
|
|
45
|
+
AlgorithmEntry,
|
|
46
|
+
AlgorithmRegistry,
|
|
47
|
+
)
|
|
48
|
+
from .base import Registry
|
|
49
|
+
from .dataset import DATASET_REGISTRY, DatasetRegistry
|
|
50
|
+
from .metric import (
|
|
51
|
+
METRIC_REGISTRY,
|
|
52
|
+
MetricEntry,
|
|
53
|
+
MetricRegistry,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
__all__ = [
|
|
58
|
+
"ALGORITHM_REGISTRY",
|
|
59
|
+
"AlgorithmEntry",
|
|
60
|
+
"AlgorithmRegistry",
|
|
61
|
+
"DATASET_REGISTRY",
|
|
62
|
+
"DatasetRegistry",
|
|
63
|
+
"METRIC_REGISTRY",
|
|
64
|
+
"MetricEntry",
|
|
65
|
+
"MetricRegistry",
|
|
66
|
+
"Registry",
|
|
67
|
+
]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import logging
|
|
3
|
+
import uuid
|
|
4
|
+
from typing import Any, NamedTuple
|
|
5
|
+
|
|
6
|
+
from ..algorithms import Algorithm
|
|
7
|
+
from .base import Registry
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AlgorithmRegistry(Registry[Algorithm]):
|
|
14
|
+
"""Registry for easy retrieval of algorithm types by name.
|
|
15
|
+
|
|
16
|
+
The registry is pre-registered with all recnexteval algorithms.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
"""Initialize the algorithm registry.
|
|
21
|
+
|
|
22
|
+
The registry is initialized with the `recnexteval.algorithms` module
|
|
23
|
+
so that all built-in algorithms are available by default.
|
|
24
|
+
"""
|
|
25
|
+
module = importlib.import_module("recnexteval.algorithms")
|
|
26
|
+
super().__init__(module)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
ALGORITHM_REGISTRY = AlgorithmRegistry()
|
|
30
|
+
"""Registry instantiation for algorithms.
|
|
31
|
+
|
|
32
|
+
Contains the recnexteval algorithms by default and allows registration of
|
|
33
|
+
new algorithms via the `register` function.
|
|
34
|
+
|
|
35
|
+
Examples:
|
|
36
|
+
```python
|
|
37
|
+
from recnexteval.pipelines import ALGORITHM_REGISTRY
|
|
38
|
+
|
|
39
|
+
# Construct an ItemKNN object with parameter K=20
|
|
40
|
+
algo = ALGORITHM_REGISTRY.get("ItemKNN")(K=20)
|
|
41
|
+
|
|
42
|
+
from recnexteval.algorithms import ItemKNN
|
|
43
|
+
|
|
44
|
+
ALGORITHM_REGISTRY.register("HelloWorld", ItemKNN)
|
|
45
|
+
|
|
46
|
+
# Also construct an ItemKNN object with parameter K=20
|
|
47
|
+
algo = ALGORITHM_REGISTRY.get("HelloWorld")(K=20)
|
|
48
|
+
```
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AlgorithmEntry(NamedTuple):
|
|
53
|
+
"""Entry for the algorithm registry.
|
|
54
|
+
|
|
55
|
+
The intended use of this class is to store the name of the algorithm and
|
|
56
|
+
the parameters that the algorithm should take. Mainly this is used during
|
|
57
|
+
the building phase of the evaluator pipeline in `Builder`.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
name: Name of the algorithm.
|
|
61
|
+
params: Parameters that do not require optimization as key-value
|
|
62
|
+
pairs, where the key is the hyperparameter name and the value is
|
|
63
|
+
the value it should take.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
name: str
|
|
67
|
+
uuid: uuid.UUID | None = None
|
|
68
|
+
params: None | dict[str, Any] = None
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import logging
|
|
3
|
+
from types import ModuleType
|
|
4
|
+
from typing import Generic, TypeVar
|
|
5
|
+
|
|
6
|
+
from ..models import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
T = TypeVar('T', bound=BaseModel)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Registry(Generic[T], BaseModel):
|
|
16
|
+
"""A Registry is a wrapper for a dictionary that maps names to Python types.
|
|
17
|
+
|
|
18
|
+
Most often, this is used to map names to classes.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, src: ModuleType) -> None:
|
|
22
|
+
self.registered: dict[str, type[T]] = {}
|
|
23
|
+
self.src = src
|
|
24
|
+
self._register_all_src()
|
|
25
|
+
|
|
26
|
+
def _register_all_src(self) -> None:
|
|
27
|
+
"""Register all classes from the src module."""
|
|
28
|
+
if not hasattr(self.src, "__all__"):
|
|
29
|
+
raise AttributeError(f"Source module {self.src} has no __all__ attribute")
|
|
30
|
+
if self.src.__all__ is None:
|
|
31
|
+
raise AttributeError(f"Source module {self.src} has __all__ set to None")
|
|
32
|
+
for class_name in self.src.__all__:
|
|
33
|
+
try:
|
|
34
|
+
cls = getattr(self.src, class_name)
|
|
35
|
+
if not inspect.isclass(cls):
|
|
36
|
+
continue
|
|
37
|
+
self.register(class_name, cls)
|
|
38
|
+
except AttributeError:
|
|
39
|
+
# Skip if the attribute doesn't exist
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
def __getitem__(self, key: str) -> type[T]:
|
|
43
|
+
"""Retrieve the type for the given key.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
key: The key of the type to fetch.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The class type associated with the key.
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
KeyError: If the key is not found in the registry.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
return self.registered[key]
|
|
56
|
+
except KeyError:
|
|
57
|
+
raise KeyError(f"key `{key}` not found in registry")
|
|
58
|
+
|
|
59
|
+
def __contains__(self, key: str) -> bool:
|
|
60
|
+
"""Check if the given key is known to the registry.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
key: The key to check.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
True if the key is known, False otherwise.
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
self[key]
|
|
70
|
+
return True
|
|
71
|
+
except KeyError:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def get(self, key: str) -> type[T]:
|
|
75
|
+
"""Retrieve the value for this key.
|
|
76
|
+
|
|
77
|
+
This value is a Python type, most often a class.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
key: The key to fetch.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
The class type associated with the key.
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
KeyError: If the key is not found in the registry.
|
|
87
|
+
"""
|
|
88
|
+
return self[key]
|
|
89
|
+
|
|
90
|
+
def register(self, key: str, cls: type[T]) -> None:
|
|
91
|
+
"""Register a new Python type (most often a class).
|
|
92
|
+
|
|
93
|
+
After registration, the key can be used to fetch the Python type from the registry.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
key: Key to register the type at. Needs to be unique to the registry.
|
|
97
|
+
cls: Class to register.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
KeyError: If the key is already registered.
|
|
101
|
+
"""
|
|
102
|
+
if key in self.registered:
|
|
103
|
+
raise KeyError(f"key `{key}` already registered")
|
|
104
|
+
self.registered[key] = cls
|
|
105
|
+
|
|
106
|
+
def get_registered_keys(self, include_base: bool = False) -> list[str]:
|
|
107
|
+
"""Get a list of all registered keys.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
A list of all registered keys.
|
|
111
|
+
"""
|
|
112
|
+
if include_base:
|
|
113
|
+
return list(self.registered.keys())
|
|
114
|
+
else:
|
|
115
|
+
return [key for key, cls in self.registered.items() if not getattr(cls, "IS_BASE", True)]
|
|
116
|
+
|
|
117
|
+
def registered_values(self) -> list[type[T]]:
|
|
118
|
+
"""Get a list of all registered types.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
A list of all registered types.
|
|
122
|
+
"""
|
|
123
|
+
return [self.registered[key] for key in self.get_registered_keys(include_base=False)]
|
|
124
|
+
|
|
125
|
+
def registered_items(self) -> list[tuple[str, type[T]]]:
|
|
126
|
+
"""Get a list of all registered key-type pairs.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
A list of all registered key-type pairs.
|
|
130
|
+
"""
|
|
131
|
+
return [(key, self.registered[key]) for key in self.get_registered_keys(include_base=False)]
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
from ..datasets import Dataset
|
|
4
|
+
from .base import Registry
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DatasetRegistry(Registry[Dataset]):
|
|
8
|
+
"""Registry for easy retrieval of dataset types by name.
|
|
9
|
+
|
|
10
|
+
The registry comes preregistered with all the recnexteval datasets.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
module = importlib.import_module('recnexteval.datasets')
|
|
15
|
+
super().__init__(module)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
DATASET_REGISTRY = DatasetRegistry()
|
|
19
|
+
"""Registry for datasets.
|
|
20
|
+
|
|
21
|
+
Contains the recnexteval metrics by default,
|
|
22
|
+
and allows registration of new metrics via the `register` function.
|
|
23
|
+
|
|
24
|
+
Example:
|
|
25
|
+
```python
|
|
26
|
+
from recnexteval.pipelines import METRIC_REGISTRY
|
|
27
|
+
|
|
28
|
+
# Construct a Recall object with parameter K=20
|
|
29
|
+
algo = METRIC_REGISTRY.get('Recall')(K=20)
|
|
30
|
+
|
|
31
|
+
from recnexteval.algorithms import Recall
|
|
32
|
+
METRIC_REGISTRY.register('HelloWorld', Recall)
|
|
33
|
+
|
|
34
|
+
# Also construct a Recall object with parameter K=20
|
|
35
|
+
algo = METRIC_REGISTRY.get('HelloWorld')(K=20)
|
|
36
|
+
```
|
|
37
|
+
"""
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
from typing import NamedTuple
|
|
3
|
+
|
|
4
|
+
from ..metrics import Metric
|
|
5
|
+
from .base import Registry
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MetricRegistry(Registry[Metric]):
|
|
9
|
+
"""Registry for easy retrieval of metric types by name.
|
|
10
|
+
|
|
11
|
+
The registry comes preregistered with all the recnexteval metrics.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self) -> None:
|
|
15
|
+
module = importlib.import_module('recnexteval.metrics')
|
|
16
|
+
super().__init__(module)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
METRIC_REGISTRY = MetricRegistry()
|
|
20
|
+
"""Registry for metrics.
|
|
21
|
+
|
|
22
|
+
Contains the recnexteval metrics by default and allows registration of new
|
|
23
|
+
metrics via the ``register`` function.
|
|
24
|
+
|
|
25
|
+
Example:
|
|
26
|
+
```python
|
|
27
|
+
from recnexteval.pipelines import METRIC_REGISTRY
|
|
28
|
+
|
|
29
|
+
# Construct a Recall object with parameter K=20
|
|
30
|
+
algo = METRIC_REGISTRY.get("Recall")(K=20)
|
|
31
|
+
|
|
32
|
+
from recnexteval.algorithms import Recall
|
|
33
|
+
|
|
34
|
+
METRIC_REGISTRY.register("HelloWorld", Recall)
|
|
35
|
+
|
|
36
|
+
# Also construct a Recall object with parameter K=20
|
|
37
|
+
algo = METRIC_REGISTRY.get("HelloWorld")(K=20)
|
|
38
|
+
```
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MetricEntry(NamedTuple):
|
|
43
|
+
"""Entry for the metric registry.
|
|
44
|
+
|
|
45
|
+
The intended use of this class is to store the name of the metric and the
|
|
46
|
+
top-K value for the metric specified by the user.
|
|
47
|
+
|
|
48
|
+
Mainly this will happen during the building phase of the evaluator
|
|
49
|
+
pipeline in :class:`Builder`.
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
name: Name of the algorithm.
|
|
53
|
+
K: Top-K value for the metric.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
name: str
|
|
57
|
+
K: None | int = None
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Settings module for data splitting strategies.
|
|
2
|
+
|
|
3
|
+
The setting module contains classes that define how the data is split. To
|
|
4
|
+
generalize the splitting of the data, the interactions are first sorted in
|
|
5
|
+
temporal order, and then the split is performed based on the setting. As this
|
|
6
|
+
library only considers dataset with timestamp, we will consider the case of the
|
|
7
|
+
single time point setting and the sliding window setting. The single time point
|
|
8
|
+
setting is analogous to Setting 3 of [@sun2023]. The sliding window setting
|
|
9
|
+
is analogous to Setting 1 of [@sun2023].
|
|
10
|
+
|
|
11
|
+

|
|
12
|
+
|
|
13
|
+
Observe the diagram below where the data split for Setting 1 is shown below. The
|
|
14
|
+
unlabeled data will contain interactions that are masked which occurs after the
|
|
15
|
+
current timestamp. The ground truth data will contain the actual interactions
|
|
16
|
+
which will be used for evaluation and then released to the algorithm.
|
|
17
|
+
|
|
18
|
+

|
|
19
|
+
|
|
20
|
+
While the this setting allows us to test the algorithm in a real-world scenario,
|
|
21
|
+
there are times when the algorithm might require some sequential data before
|
|
22
|
+
a prediction can be made. While it is not the role of the evaluating platform
|
|
23
|
+
to provide this data, we have included the option to provide the last n interactions.
|
|
24
|
+
|
|
25
|
+
## Data Components
|
|
26
|
+
|
|
27
|
+
Each split produces three data components:
|
|
28
|
+
|
|
29
|
+
- **background_data**: Data that is used to train the algorithm before the first
|
|
30
|
+
split.
|
|
31
|
+
|
|
32
|
+
- **unlabeled_data**: Data that is released to the algorithm for prediction.
|
|
33
|
+
Contains the ID to be predicted and is labeled with "-1". Timestamps of the
|
|
34
|
+
interactions to be predicted are preserved. Can contain the last n interactions
|
|
35
|
+
split if specified in the parameter. The purpose is to provide sequential
|
|
36
|
+
data to the algorithm.
|
|
37
|
+
|
|
38
|
+
- **ground_truth_data**: Data that is used to evaluate the algorithm. This data
|
|
39
|
+
will contain the actual interactions. The unlabeled data with the masked data
|
|
40
|
+
is a subset of the ground truth to ensure that there is an actual corresponding
|
|
41
|
+
value to evaluate the prediction against.
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
## Available Settings
|
|
45
|
+
|
|
46
|
+
- `Setting`: Base class for data splitting settings
|
|
47
|
+
- `SingleTimePointSetting`: Single time point splitting strategy
|
|
48
|
+
- `SlidingWindowSetting`: Sliding window splitting strategy
|
|
49
|
+
- `LeaveNOutSetting`: Leave-N-out cross-validation strategy
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
Settings are stateful. Thus, the initialization of the setting object only stores
|
|
54
|
+
the parameters that are passed. Calling of `Setting.split` is necessary
|
|
55
|
+
such that the attributes `Setting.background_data`, `Setting.unlabeled_data`
|
|
56
|
+
and `Setting.ground_truth_data` are populated.
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from recnexteval.datasets import AmazonMovieDataset
|
|
60
|
+
from recnexteval.settings import SlidingWindowSetting
|
|
61
|
+
|
|
62
|
+
dataset = AmazonMovieDataset(use_default_filters=False)
|
|
63
|
+
data = dataset.load()
|
|
64
|
+
|
|
65
|
+
setting = SlidingWindowSetting(
|
|
66
|
+
background_t=1530000000,
|
|
67
|
+
window_size=60 * 60 * 24 * 30, # 30 days
|
|
68
|
+
n_seq_data=1,
|
|
69
|
+
top_K=10
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
setting.split(data)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Splitters
|
|
76
|
+
|
|
77
|
+
Splitters are stateless utilities that split data into past and future interactions.
|
|
78
|
+
They abstract splitting logic from the setting architecture, enabling flexible
|
|
79
|
+
implementations.
|
|
80
|
+
|
|
81
|
+
### Available Splitters
|
|
82
|
+
|
|
83
|
+
- `TimestampSplitter`: Split data by timestamp
|
|
84
|
+
- `NLastInteractionTimestampSplitter`: Split using N past interactions and timestamp
|
|
85
|
+
- `NLastInteractionSplitter`: Split using N last interactions
|
|
86
|
+
|
|
87
|
+
## Processors
|
|
88
|
+
|
|
89
|
+
Processors handle data transformation. The current implementation masks prediction
|
|
90
|
+
data and injects it into unlabeled data. Custom processors can implement additional
|
|
91
|
+
transformations.
|
|
92
|
+
|
|
93
|
+
### Available Processors
|
|
94
|
+
|
|
95
|
+
- `Processor`: Base class for data processors
|
|
96
|
+
- `PredictionDataProcessor`: Masks and injects prediction data
|
|
97
|
+
|
|
98
|
+
## Exceptions
|
|
99
|
+
|
|
100
|
+
- `EOWSettingError`: Raised when end of window is reached
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
from .base import Setting
|
|
104
|
+
from .exception import EOWSettingError
|
|
105
|
+
from .leave_n_out_setting import LeaveNOutSetting
|
|
106
|
+
from .processor import PredictionDataProcessor, Processor
|
|
107
|
+
from .single_time_point_setting import SingleTimePointSetting
|
|
108
|
+
from .sliding_window_setting import SlidingWindowSetting
|
|
109
|
+
from .splitters import (
|
|
110
|
+
NLastInteractionSplitter,
|
|
111
|
+
NLastInteractionTimestampSplitter,
|
|
112
|
+
TimestampSplitter,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
__all__ = [
|
|
117
|
+
"Setting",
|
|
118
|
+
"SingleTimePointSetting",
|
|
119
|
+
"SlidingWindowSetting",
|
|
120
|
+
"LeaveNOutSetting",
|
|
121
|
+
"Processor",
|
|
122
|
+
"PredictionDataProcessor",
|
|
123
|
+
"TimestampSplitter",
|
|
124
|
+
"NLastInteractionTimestampSplitter",
|
|
125
|
+
"NLastInteractionSplitter",
|
|
126
|
+
"EOWSettingError",
|
|
127
|
+
]
|