haystack-ml-stack 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. haystack_ml_stack-0.3.1/PKG-INFO +99 -0
  2. haystack_ml_stack-0.3.1/README.md +81 -0
  3. haystack_ml_stack-0.3.1/pyproject.toml +27 -0
  4. haystack_ml_stack-0.3.1/setup.cfg +4 -0
  5. haystack_ml_stack-0.3.1/src/haystack_ml_stack/__init__.py +14 -0
  6. haystack_ml_stack-0.3.1/src/haystack_ml_stack/_serializers.py +368 -0
  7. haystack_ml_stack-0.3.1/src/haystack_ml_stack/app.py +298 -0
  8. haystack_ml_stack-0.3.1/src/haystack_ml_stack/cache.py +19 -0
  9. haystack_ml_stack-0.3.1/src/haystack_ml_stack/dynamo.py +326 -0
  10. haystack_ml_stack-0.3.1/src/haystack_ml_stack/exceptions.py +5 -0
  11. haystack_ml_stack-0.3.1/src/haystack_ml_stack/generated/__init__.py +0 -0
  12. haystack_ml_stack-0.3.1/src/haystack_ml_stack/generated/v1/__init__.py +0 -0
  13. haystack_ml_stack-0.3.1/src/haystack_ml_stack/generated/v1/features_pb2.py +70 -0
  14. haystack_ml_stack-0.3.1/src/haystack_ml_stack/generated/v1/features_pb2.pyi +136 -0
  15. haystack_ml_stack-0.3.1/src/haystack_ml_stack/model_store.py +38 -0
  16. haystack_ml_stack-0.3.1/src/haystack_ml_stack/settings.py +23 -0
  17. haystack_ml_stack-0.3.1/src/haystack_ml_stack/utils.py +675 -0
  18. haystack_ml_stack-0.3.1/src/haystack_ml_stack.egg-info/PKG-INFO +99 -0
  19. haystack_ml_stack-0.3.1/src/haystack_ml_stack.egg-info/SOURCES.txt +22 -0
  20. haystack_ml_stack-0.3.1/src/haystack_ml_stack.egg-info/dependency_links.txt +1 -0
  21. haystack_ml_stack-0.3.1/src/haystack_ml_stack.egg-info/requires.txt +10 -0
  22. haystack_ml_stack-0.3.1/src/haystack_ml_stack.egg-info/top_level.txt +1 -0
  23. haystack_ml_stack-0.3.1/tests/test_serializers.py +152 -0
  24. haystack_ml_stack-0.3.1/tests/test_utils.py +510 -0
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.4
2
+ Name: haystack-ml-stack
3
+ Version: 0.3.1
4
+ Summary: Functions related to Haystack ML
5
+ Author-email: Oscar Vega <oscar@haystack.tv>
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: protobuf==6.33.2
10
+ Provides-Extra: server
11
+ Requires-Dist: pydantic==2.5.0; extra == "server"
12
+ Requires-Dist: cachetools==5.5.2; extra == "server"
13
+ Requires-Dist: cloudpickle==2.2.1; extra == "server"
14
+ Requires-Dist: aioboto3==12.0.0; extra == "server"
15
+ Requires-Dist: fastapi==0.104.1; extra == "server"
16
+ Requires-Dist: pydantic-settings==2.2; extra == "server"
17
+ Requires-Dist: newrelic==11.1.0; extra == "server"
18
+
19
+ # Haystack ML Stack
20
+
21
+ Currently this project contains a FastAPI-based service designed for low-latency scoring of streams data coming from http requests
22
+
23
+ ## 🚀 Features
24
+
25
+ * **FastAPI Service:** Lightweight and fast web service for ML inference.
26
+ * **Asynchronous I/O:** Utilizes `aiobotocore` for non-blocking S3 and DynamoDB operations.
27
+ * **Model Loading:** Downloads and loads the ML model (using `cloudpickle`) from a configurable S3 path on startup.
28
+ * **Feature Caching:** Implements a thread-safe Time-To-Live (TTL) / Least-Recently-Used (LRU) cache (`cachetools.TLRUCache`) for DynamoDB features, reducing latency and database load.
29
+ * **DynamoDB Integration:** Fetches stream-specific features from DynamoDB to enrich the data before scoring.
30
+ * **Health Check:** Provides a `/health` endpoint to monitor service status and model loading.
31
+
32
+ ## 📦 Installation
33
+
34
+ This project requires Python 3.11 or later.
35
+
36
+ 1. **Install package:**
37
+ The dependencies associated are listed in `pyproject.toml`.
38
+
39
+ ```bash
40
+ pip install haystack-ml-stack
41
+ ```
42
+
43
+ ## ⚙️ Configuration
44
+
45
+ The service is configured using environment variables, managed by `pydantic-settings`. You can use a `.env` file for local development.
46
+
47
+ | Variable Name | Alias | Default | Description |
48
+ | :--- | :--- | :--- | :--- |
49
+ | `S3_MODEL_PATH` | `S3_MODEL_PATH` | `None` | **Required.** The `s3://bucket/key` URL for the cloudpickled ML model file. |
50
+ | `FEATURES_TABLE`| `FEATURES_TABLE`| `"features"` | Name of the DynamoDB table storing stream features. |
51
+ | `LOGS_FRACTION` | `LOGS_FRACTION` | `0.01` | Fraction of requests to log detailed stream data for sampling/debugging (0.0 to 1.0). |
52
+ | `CACHE_MAXSIZE` | *(none)* | `50000` | Maximum size of the in-memory feature cache. |
53
+
54
+ **Example env vars**
55
+
56
+ ```env
57
+ S3_MODEL_PATH="s3://my-ml-models/stream-scorer/latest.pkl"
58
+ FEATURES_TABLE="features"
59
+ LOGS_FRACTION=0.05
60
+ ```
61
+
62
+ ## 🌐 Endpoints
63
+ | Method | Path | Description |
64
+ | :--- | :--- | :--- |
65
+ | **GET** | `/` | Root endpoint, returns a simple running message. |
66
+ | **GET** | `/health` | Checks if the service is running and if the ML model has been loaded. |
67
+ | **POST** | `/score` | **Main scoring endpoint.** Accepts stream data and returns model predictions. |
68
+
69
+ ## 💻 Technical Details
70
+
71
+ ### Model Structure
72
+ The ML model file downloaded from S3 is expected to be a cloudpickle-serialized Python dictionary with the following structure:
73
+
74
+ ``` python
75
+
76
+ model = {
77
+ "preprocess": <function>, # Function to transform request data into model input.
78
+ "predict": <function>, # Function to perform the actual model inference.
79
+ "params": <dict/any>, # Optional parameters passed to preprocess/predict.
80
+ "stream_features": <list[str]>, # Optional list of feature names to fetch from DynamoDB.
81
+ }
82
+ ```
83
+
84
+ ### Feature Caching (cache.py)
85
+ The `ThreadSafeTLRUCache` ensures that feature lookups and updates are thread-safe.
86
+ The `_ttu` (time-to-use) policy allows features to specify their own TTL via a `cache_ttl_in_seconds` key in the stored value.
87
+
88
+ ### DynamoDB Feature Fetching (dynamo.py)
89
+ The set_stream_features function handles:
90
+
91
+ - Checking the in-memory cache for required `stream_features`.
92
+
93
+ - Batch-fetching any missing features from DynamoDB.
94
+
95
+ - Parsing the low-level DynamoDB items into Python types.
96
+
97
+ - Populating the cache with the fetched data, respecting the feature's TTL.
98
+
99
+ - Injecting the fetched feature values back into the streams list in the request payload.
@@ -0,0 +1,81 @@
1
+ # Haystack ML Stack
2
+
3
+ Currently this project contains a FastAPI-based service designed for low-latency scoring of streams data coming from http requests
4
+
5
+ ## 🚀 Features
6
+
7
+ * **FastAPI Service:** Lightweight and fast web service for ML inference.
8
+ * **Asynchronous I/O:** Utilizes `aiobotocore` for non-blocking S3 and DynamoDB operations.
9
+ * **Model Loading:** Downloads and loads the ML model (using `cloudpickle`) from a configurable S3 path on startup.
10
+ * **Feature Caching:** Implements a thread-safe Time-To-Live (TTL) / Least-Recently-Used (LRU) cache (`cachetools.TLRUCache`) for DynamoDB features, reducing latency and database load.
11
+ * **DynamoDB Integration:** Fetches stream-specific features from DynamoDB to enrich the data before scoring.
12
+ * **Health Check:** Provides a `/health` endpoint to monitor service status and model loading.
13
+
14
+ ## 📦 Installation
15
+
16
+ This project requires Python 3.11 or later.
17
+
18
+ 1. **Install package:**
19
+ The dependencies associated are listed in `pyproject.toml`.
20
+
21
+ ```bash
22
+ pip install haystack-ml-stack
23
+ ```
24
+
25
+ ## ⚙️ Configuration
26
+
27
+ The service is configured using environment variables, managed by `pydantic-settings`. You can use a `.env` file for local development.
28
+
29
+ | Variable Name | Alias | Default | Description |
30
+ | :--- | :--- | :--- | :--- |
31
+ | `S3_MODEL_PATH` | `S3_MODEL_PATH` | `None` | **Required.** The `s3://bucket/key` URL for the cloudpickled ML model file. |
32
+ | `FEATURES_TABLE`| `FEATURES_TABLE`| `"features"` | Name of the DynamoDB table storing stream features. |
33
+ | `LOGS_FRACTION` | `LOGS_FRACTION` | `0.01` | Fraction of requests to log detailed stream data for sampling/debugging (0.0 to 1.0). |
34
+ | `CACHE_MAXSIZE` | *(none)* | `50000` | Maximum size of the in-memory feature cache. |
35
+
36
+ **Example env vars**
37
+
38
+ ```env
39
+ S3_MODEL_PATH="s3://my-ml-models/stream-scorer/latest.pkl"
40
+ FEATURES_TABLE="features"
41
+ LOGS_FRACTION=0.05
42
+ ```
43
+
44
+ ## 🌐 Endpoints
45
+ | Method | Path | Description |
46
+ | :--- | :--- | :--- |
47
+ | **GET** | `/` | Root endpoint, returns a simple running message. |
48
+ | **GET** | `/health` | Checks if the service is running and if the ML model has been loaded. |
49
+ | **POST** | `/score` | **Main scoring endpoint.** Accepts stream data and returns model predictions. |
50
+
51
+ ## 💻 Technical Details
52
+
53
+ ### Model Structure
54
+ The ML model file downloaded from S3 is expected to be a cloudpickle-serialized Python dictionary with the following structure:
55
+
56
+ ``` python
57
+
58
+ model = {
59
+ "preprocess": <function>, # Function to transform request data into model input.
60
+ "predict": <function>, # Function to perform the actual model inference.
61
+ "params": <dict/any>, # Optional parameters passed to preprocess/predict.
62
+ "stream_features": <list[str]>, # Optional list of feature names to fetch from DynamoDB.
63
+ }
64
+ ```
65
+
66
+ ### Feature Caching (cache.py)
67
+ The `ThreadSafeTLRUCache` ensures that feature lookups and updates are thread-safe.
68
+ The `_ttu` (time-to-use) policy allows features to specify their own TTL via a `cache_ttl_in_seconds` key in the stored value.
69
+
70
+ ### DynamoDB Feature Fetching (dynamo.py)
71
+ The set_stream_features function handles:
72
+
73
+ - Checking the in-memory cache for required `stream_features`.
74
+
75
+ - Batch-fetching any missing features from DynamoDB.
76
+
77
+ - Parsing the low-level DynamoDB items into Python types.
78
+
79
+ - Populating the cache with the fetched data, respecting the feature's TTL.
80
+
81
+ - Injecting the fetched feature values back into the streams list in the request payload.
@@ -0,0 +1,27 @@
1
+ # pyproject.toml
2
+ [build-system]
3
+ requires = ["setuptools>=69", "wheel", "build"]
4
+ build-backend = "setuptools.build_meta"
5
+
6
+ [project]
7
+ name = "haystack-ml-stack"
8
+ version = "0.3.1"
9
+ description = "Functions related to Haystack ML"
10
+ readme = "README.md"
11
+ authors = [{ name = "Oscar Vega", email = "oscar@haystack.tv" }]
12
+ requires-python = ">=3.11"
13
+ dependencies = [
14
+ "protobuf==6.33.2",
15
+ ]
16
+ license = { text = "MIT" }
17
+
18
+ [project.optional-dependencies]
19
+ server = [
20
+ "pydantic==2.5.0",
21
+ "cachetools==5.5.2",
22
+ "cloudpickle==2.2.1",
23
+ "aioboto3==12.0.0",
24
+ "fastapi==0.104.1",
25
+ "pydantic-settings==2.2",
26
+ "newrelic==11.1.0",
27
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ __all__ = []
2
+
3
+ try:
4
+ from .app import create_app
5
+
6
+ __all__ = ["create_app"]
7
+ except ImportError:
8
+ pass
9
+
10
+ from ._serializers import SerializerRegistry, FeatureRegistryId
11
+
12
+ __all__ = [*__all__, "SerializerRegistry", "FeatureRegistryId"]
13
+
14
+ __version__ = "0.3.1"
@@ -0,0 +1,368 @@
1
+ from .generated.v1 import features_pb2 as features_pb2_v1
2
+ from google.protobuf.message import Message
3
+ from google.protobuf.json_format import ParseDict as ProtoParseDict
4
+ import typing as _t
5
+ from abc import ABC, abstractmethod
6
+
7
+ MessageType = _t.TypeVar("MessageType", bound=Message)
8
+
9
+
10
+ class Serializer(ABC):
11
+ @abstractmethod
12
+ def serialize(self, value) -> bytes: ...
13
+
14
+ @abstractmethod
15
+ def deserialize(self, value: bytes) -> _t.Any: ...
16
+
17
+
18
+ class SimpleSerializer(Serializer, _t.Generic[MessageType]):
19
+ """This simple serializer uses the function `ParseDict` provided by google
20
+ to parse dictionaries. While it allows for simple code, it's very slow to run.
21
+ This class should be used directly for PoCs only, production serializers should have
22
+ custom implementations where fields are set directly. Early tests show that
23
+ manual serialization can provide 10x speedup.
24
+
25
+ Deserialization is fine since it deserializes from the binary into the message
26
+ itself, it doesn't need to create a dictionary."""
27
+
28
+ def __init__(self, msg_class: type[MessageType]):
29
+ self.msg_class = msg_class
30
+ return
31
+
32
+ def serialize(self, value) -> bytes:
33
+ msg = self.msg_class()
34
+ return ProtoParseDict(value, message=msg).SerializeToString()
35
+
36
+ def deserialize(self, value) -> MessageType:
37
+ msg: Message = self.msg_class()
38
+ msg.ParseFromString(value)
39
+ return msg
40
+
41
+
42
+ class StreamPWatchedSerializerV1(SimpleSerializer):
43
+ def __init__(self):
44
+ super().__init__(msg_class=features_pb2_v1.StreamPWatched)
45
+
46
+ def serialize(self, value):
47
+ root_msg = self.build_msg(value)
48
+ return root_msg.SerializeToString()
49
+
50
+ def build_msg(self, value) -> features_pb2_v1.StreamPWatched:
51
+ message = self.msg_class()
52
+ assert value["version"] == 1, "Wrong version given!"
53
+ message.version = value["version"]
54
+ for entry_context, counts in value["data"].items():
55
+ entry_context_msg: features_pb2_v1.EntryContextCounts = getattr(
56
+ message.data, entry_context
57
+ )
58
+ entry_context_msg.attempts = int(counts["attempts"])
59
+ entry_context_msg.watched = int(counts["watched"])
60
+ return message
61
+
62
+
63
+ UserPWatchedSerializerV1 = StreamPWatchedSerializerV1
64
+
65
+
66
+ class StreamPWatchedSerializerV0(Serializer):
67
+ serializer_v1 = StreamPWatchedSerializerV1()
68
+
69
+ def serialize(self, value) -> bytes:
70
+ raise NotImplementedError(
71
+ "This serializer should never be used for serialization!"
72
+ )
73
+
74
+ def deserialize(self, value) -> features_pb2_v1.StreamPWatched:
75
+ value = {
76
+ "data": {
77
+ entry_context.replace(" ", "_"): counts
78
+ for entry_context, counts in value.items()
79
+ },
80
+ "version": 1,
81
+ }
82
+ return self.serializer_v1.build_msg(value)
83
+
84
+
85
+ class StreamPSelectSerializerV1(SimpleSerializer):
86
+ def __init__(self):
87
+ super().__init__(msg_class=features_pb2_v1.StreamPSelect)
88
+ return
89
+
90
+ def serialize(self, value) -> bytes:
91
+ root_msg = self.build_msg(value)
92
+ return root_msg.SerializeToString()
93
+
94
+ def build_msg(self, value) -> features_pb2_v1.StreamPSelect:
95
+ message: features_pb2_v1.StreamPSelect = self.msg_class()
96
+ assert value["version"] == 1, "Wrong version given!"
97
+ message.version = 1
98
+ data = value["data"]
99
+ for (
100
+ browsed_debias_key,
101
+ position_pselects,
102
+ ) in data.items():
103
+ position_pselects_msg: features_pb2_v1.PositionPSelect = getattr(
104
+ message.data, browsed_debias_key
105
+ )
106
+ for position, select_counts in position_pselects.items():
107
+ select_counts_msg = getattr(position_pselects_msg, position)
108
+ select_counts_msg.total_selects = int(select_counts["total_selects"])
109
+ select_counts_msg.total_browsed = int(select_counts["total_browsed"])
110
+ select_counts_msg.total_selects_and_watched = int(
111
+ select_counts["total_selects_and_watched"]
112
+ )
113
+ return message
114
+
115
+
116
+ UserPSelectSerializerV1 = StreamPSelectSerializerV1
117
+
118
+
119
+ class StreamPSelectSerializerV0(Serializer):
120
+ serializer_v1 = StreamPSelectSerializerV1()
121
+
122
+ def serialize(self, value) -> bytes:
123
+ raise NotImplementedError(
124
+ "This serializer should never be used for serialization!"
125
+ )
126
+
127
+ def deserialize(self, value):
128
+ key_mapping = {
129
+ "0": "first_pos",
130
+ "1": "second_pos",
131
+ "2": "third_pos",
132
+ "3+": "rest_pos",
133
+ }
134
+ for browsed_debiasing in value.keys():
135
+ for old_key, new_key in key_mapping.items():
136
+ if old_key not in value[browsed_debiasing]:
137
+ continue
138
+ value[browsed_debiasing][new_key] = value[browsed_debiasing].pop(
139
+ old_key
140
+ )
141
+ out = {
142
+ "data": {
143
+ "up_to_4_browsed": value["4_browsed"],
144
+ "all_browsed": value["all_browsed"],
145
+ },
146
+ "version": 1,
147
+ }
148
+ msg = self.serializer_v1.build_msg(value=out)
149
+ return msg
150
+
151
+
152
+ class StreamSimilaritySerializerV1(SimpleSerializer):
153
+ def __init__(self):
154
+ super().__init__(msg_class=features_pb2_v1.StreamSimilarityScores)
155
+
156
+ def serialize(self, value):
157
+ msg = self.build_msg(value)
158
+ return msg.SerializeToString()
159
+
160
+ def build_msg(self, value) -> features_pb2_v1.StreamSimilarityScores:
161
+ message = self.msg_class()
162
+ assert value["version"] == 1, "Wrong version given!"
163
+ message.version = value["version"]
164
+ for key, score in value["data"].items():
165
+ message.data[key] = score
166
+ return message
167
+
168
+
169
+ class StreamSimilaritySerializerV0(Serializer):
170
+ serializer_v1 = StreamSimilaritySerializerV1()
171
+
172
+ def serialize(self, value):
173
+ raise NotImplementedError(
174
+ "This serializer should never be used for serialization!"
175
+ )
176
+
177
+ def deserialize(self, value):
178
+ value = {"data": value, "version": 1}
179
+ msg = self.serializer_v1.build_msg(value)
180
+ return msg
181
+
182
+
183
+ class UserPersonalizingPWatchedSerializerV1(SimpleSerializer):
184
+ def __init__(self):
185
+ super().__init__(msg_class=features_pb2_v1.UserPersonalizingPWatched)
186
+
187
+ def serialize(self, value: dict) -> bytes:
188
+ root_msg = self.build_msg(value)
189
+ return root_msg.SerializeToString()
190
+
191
+ def build_msg(self, value) -> features_pb2_v1.UserPersonalizingPWatched:
192
+ root_msg = features_pb2_v1.UserPersonalizingPWatched()
193
+ assert value["version"] == 1, "Wrong version given!"
194
+ root_msg.version = value["version"]
195
+ data = value["data"]
196
+ for personalizing_key, entry_context_pwatched in data.items():
197
+ personalizing_msg = root_msg.data[personalizing_key]
198
+ for entry_context, counts in entry_context_pwatched.items():
199
+ entry_context_msg = getattr(personalizing_msg, entry_context)
200
+ entry_context_msg.attempts = int(counts["attempts"])
201
+ entry_context_msg.watched = int(counts["watched"])
202
+ return root_msg
203
+
204
+
205
+ class UserPersonalizingPSelectSerializerV1(SimpleSerializer):
206
+ def __init__(self):
207
+ super().__init__(msg_class=features_pb2_v1.UserPersonalizingPSelect)
208
+
209
+ def serialize(self, value):
210
+ root_msg = features_pb2_v1.UserPersonalizingPSelect()
211
+ root_msg.version = value["version"]
212
+ data = value["data"]
213
+ for personalizing_key, browsed_debiased_pselecs in data.items():
214
+ personalizing_msg = root_msg.data[personalizing_key]
215
+ for (
216
+ browsed_debias_key,
217
+ position_pselects,
218
+ ) in browsed_debiased_pselecs.items():
219
+ position_pselects_msg = getattr(personalizing_msg, browsed_debias_key)
220
+ for position, select_counts in position_pselects.items():
221
+ select_counts_msg = getattr(position_pselects_msg, position)
222
+ select_counts_msg.total_selects = int(
223
+ select_counts["total_selects"]
224
+ )
225
+ select_counts_msg.total_browsed = int(
226
+ select_counts["total_browsed"]
227
+ )
228
+ select_counts_msg.total_selects_and_watched = int(
229
+ select_counts["total_selects_and_watched"]
230
+ )
231
+ return root_msg.SerializeToString()
232
+
233
+
234
+ class PassThroughSerializer(Serializer):
235
+ def serialize(self, value):
236
+ return value
237
+
238
+ def deserialize(self, value):
239
+ return value
240
+
241
+
242
+ user_personalizing_pwatched_serializer_v1 = UserPersonalizingPWatchedSerializerV1()
243
+ user_pwatched_serializer_v1 = UserPWatchedSerializerV1()
244
+ user_personalizing_pselect_serializer_v1 = UserPersonalizingPSelectSerializerV1()
245
+ user_pselect_serializer_v1 = UserPSelectSerializerV1()
246
+ stream_pwatched_serializer_v0 = StreamPWatchedSerializerV0()
247
+ stream_pwatched_serializer_v1 = StreamPWatchedSerializerV1()
248
+ stream_pselect_serializer_v0 = StreamPSelectSerializerV0()
249
+ stream_pselect_serializer_v1 = StreamPSelectSerializerV1()
250
+ stream_similarity_scores_serializer_v0 = StreamSimilaritySerializerV0()
251
+ stream_similarity_scores_serializer_v1 = StreamSimilaritySerializerV1()
252
+
253
+
254
+ class FeatureRegistryId(_t.NamedTuple):
255
+ entity_type: _t.Literal["STREAM", "USER"]
256
+ feature_id: str
257
+ version: str
258
+
259
+
260
+ stream_pwatched_v0_features: list[FeatureRegistryId] = [
261
+ FeatureRegistryId(entity_type="STREAM", feature_id="PWATCHED#24H", version="v0"),
262
+ FeatureRegistryId(entity_type="STREAM", feature_id="PWATCHED#24H#TV", version="v0"),
263
+ FeatureRegistryId(
264
+ entity_type="STREAM", feature_id="PWATCHED#24H#MOBILE", version="v0"
265
+ ),
266
+ ]
267
+
268
+ stream_pwatched_v1_features: list[FeatureRegistryId] = [
269
+ FeatureRegistryId(entity_type="STREAM", feature_id="PWATCHED#24H", version="v1"),
270
+ FeatureRegistryId(entity_type="STREAM", feature_id="PWATCHED#24H#TV", version="v1"),
271
+ FeatureRegistryId(
272
+ entity_type="STREAM", feature_id="PWATCHED#24H#MOBILE", version="v1"
273
+ ),
274
+ ]
275
+
276
+ stream_pselect_v0_features: list[FeatureRegistryId] = [
277
+ FeatureRegistryId(entity_type="STREAM", feature_id="PSELECT#24H", version="v0"),
278
+ FeatureRegistryId(
279
+ entity_type="STREAM", feature_id="PSELECT#24H#MOBILE", version="v0"
280
+ ),
281
+ FeatureRegistryId(entity_type="STREAM", feature_id="PSELECT#24H#TV", version="v0"),
282
+ ]
283
+
284
+ stream_pselect_v1_features: list[FeatureRegistryId] = [
285
+ FeatureRegistryId(entity_type="STREAM", feature_id="PSELECT#24H", version="v1"),
286
+ FeatureRegistryId(
287
+ entity_type="STREAM", feature_id="PSELECT#24H#MOBILE", version="v1"
288
+ ),
289
+ FeatureRegistryId(entity_type="STREAM", feature_id="PSELECT#24H#TV", version="v1"),
290
+ ]
291
+
292
+ stream_similarity_v0_features: list[FeatureRegistryId] = [
293
+ FeatureRegistryId(entity_type="STREAM", feature_id="SIMILARITY", version="v0"),
294
+ FeatureRegistryId(
295
+ entity_type="STREAM", feature_id="SIMILARITY#WEATHER_ALERT", version="v0"
296
+ ),
297
+ ]
298
+
299
+ stream_similarity_v1_features: list[FeatureRegistryId] = [
300
+ FeatureRegistryId(
301
+ entity_type="STREAM", feature_id="SIMILARITY#GEMINI", version="v1"
302
+ ),
303
+ FeatureRegistryId(
304
+ entity_type="STREAM", feature_id="SIMILARITY#WEATHER_ALERT", version="v1"
305
+ ),
306
+ ]
307
+
308
+ user_personalizing_pwatched_v1_features: list[FeatureRegistryId] = [
309
+ FeatureRegistryId(
310
+ entity_type="USER", feature_id="PWATCHED#6M#CATEGORY", version="v1"
311
+ ),
312
+ FeatureRegistryId(
313
+ entity_type="USER",
314
+ feature_id="PWATCHED#6M#AUTHOR_SHOW",
315
+ version="v1",
316
+ ),
317
+ FeatureRegistryId(
318
+ entity_type="USER",
319
+ feature_id="PWATCHED#6M#GEMINI_CATEGORY",
320
+ version="v1",
321
+ ),
322
+ ]
323
+
324
+ user_personalizing_pselect_v1_features: list[FeatureRegistryId] = [
325
+ FeatureRegistryId(
326
+ entity_type="USER", feature_id="PSELECT#6M#CATEGORY", version="v1"
327
+ ),
328
+ FeatureRegistryId(
329
+ entity_type="USER", feature_id="PSELECT#6M#AUTHOR_SHOW", version="v1"
330
+ ),
331
+ FeatureRegistryId(
332
+ entity_type="USER", feature_id="PSELECT#6M#GEMINI_CATEGORY", version="v1"
333
+ ),
334
+ ]
335
+
336
+ user_bias_pwatched_v1_features: list[FeatureRegistryId] = [
337
+ FeatureRegistryId(entity_type="USER", feature_id="PWATCHED#6M", version="v1")
338
+ ]
339
+
340
+ user_bias_pselect_v1_features: list[FeatureRegistryId] = [
341
+ FeatureRegistryId(entity_type="USER", feature_id="PSELECT#6M", version="v1")
342
+ ]
343
+
344
+ features_serializer_tuples: list[tuple[list[FeatureRegistryId], Serializer]] = [
345
+ (stream_pwatched_v0_features, stream_pwatched_serializer_v0),
346
+ (stream_pwatched_v1_features, stream_pwatched_serializer_v1),
347
+ (stream_pselect_v0_features, stream_pselect_serializer_v0),
348
+ (stream_pselect_v1_features, stream_pselect_serializer_v1),
349
+ (stream_similarity_v0_features, stream_similarity_scores_serializer_v0),
350
+ (stream_similarity_v1_features, stream_similarity_scores_serializer_v1),
351
+ (
352
+ user_personalizing_pwatched_v1_features,
353
+ user_personalizing_pwatched_serializer_v1,
354
+ ),
355
+ (user_bias_pwatched_v1_features, user_pwatched_serializer_v1),
356
+ (user_personalizing_pselect_v1_features, user_personalizing_pselect_serializer_v1),
357
+ (user_bias_pselect_v1_features, user_pselect_serializer_v1),
358
+ ]
359
+
360
+ SerializerRegistry: dict[FeatureRegistryId, Serializer] = {
361
+ FeatureRegistryId(
362
+ entity_type="PASS_THROUGH", feature_id="PASS_THROUGH", version="v1"
363
+ ): PassThroughSerializer()
364
+ }
365
+
366
+ for feature_ids, serializer in features_serializer_tuples:
367
+ for feature_id in feature_ids:
368
+ SerializerRegistry[feature_id] = serializer