frogml 1.2.21__py3-none-any.whl → 1.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frogml/__init__.py +1 -1
- frogml/_proto/qwak/model_group/model_group_repository_details_pb2.py +16 -12
- frogml/_proto/qwak/model_group/model_group_repository_details_pb2.pyi +50 -6
- frogml/core/clients/feature_store/execution_management_client.py +27 -0
- frogml/core/feature_store/execution/streaming_backfill.py +49 -0
- frogml/feature_store/feature_sets/streaming.py +84 -63
- frogml/feature_store/feature_sets/streaming_backfill.py +90 -136
- {frogml-1.2.21.dist-info → frogml-1.2.24.dist-info}/METADATA +1 -1
- {frogml-1.2.21.dist-info → frogml-1.2.24.dist-info}/RECORD +11 -10
- frogml_services_mock/mocks/execution_management_service.py +11 -1
- {frogml-1.2.21.dist-info → frogml-1.2.24.dist-info}/WHEEL +0 -0
frogml/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ _sym_db = _symbol_database.Default()
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n5qwak/model_group/model_group_repository_details.proto\x12\x1cqwak.model_groups.management\"r\n\x11RepositoryDetails\x12\x45\n\x0frepository_type\x18\x01 \x01(\x0b\x32,.qwak.model_groups.management.RepositoryType\x12\x16\n\x0erepository_key\x18\x02 \x01(\t\"\
|
|
17
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n5qwak/model_group/model_group_repository_details.proto\x12\x1cqwak.model_groups.management\"r\n\x11RepositoryDetails\x12\x45\n\x0frepository_type\x18\x01 \x01(\x0b\x32,.qwak.model_groups.management.RepositoryType\x12\x16\n\x0erepository_key\x18\x02 \x01(\t\"\x80\x03\n\x17RemoteRepositoryDetails\x12K\n\x11\x64ocker_repository\x18\x01 \x01(\x0b\x32..qwak.model_groups.management.DockerRepositoryH\x00\x12V\n\x17hugging_face_repository\x18\x02 \x01(\x0b\x32\x33.qwak.model_groups.management.HuggingFaceRepositoryH\x00\x12\x45\n\x0enpm_repository\x18\x04 \x01(\x0b\x32+.qwak.model_groups.management.NpmRepositoryH\x00\x12G\n\x0fpypi_repository\x18\x05 \x01(\x0b\x32,.qwak.model_groups.management.PypiRepositoryH\x00\x12\x1d\n\x15repository_remote_url\x18\x03 \x01(\tB\x11\n\x0frepository_type\"\x12\n\x10\x44ockerRepository\"\x13\n\x11\x44\x61tasetRepository\"\x14\n\x12\x41rtifactRepository\"\x17\n\x15HuggingFaceRepository\"\x0f\n\rNpmRepository\"\x10\n\x0ePypiRepository\"\xed\x03\n\x0eRepositoryType\x12K\n\x11\x64ocker_repository\x18\x01 \x01(\x0b\x32..qwak.model_groups.management.DockerRepositoryH\x00\x12M\n\x12\x64\x61taset_repository\x18\x02 \x01(\x0b\x32/.qwak.model_groups.management.DatasetRepositoryH\x00\x12O\n\x13\x61rtifact_repository\x18\x03 \x01(\x0b\x32\x30.qwak.model_groups.management.ArtifactRepositoryH\x00\x12V\n\x17hugging_face_repository\x18\x04 \x01(\x0b\x32\x33.qwak.model_groups.management.HuggingFaceRepositoryH\x00\x12\x45\n\x0enpm_repository\x18\x05 \x01(\x0b\x32+.qwak.model_groups.management.NpmRepositoryH\x00\x12G\n\x0fpypi_repository\x18\x06 \x01(\x0b\x32,.qwak.model_groups.management.PypiRepositoryH\x00\x42\x06\n\x04typeBN\n&com.qwak.ai.management.model.group.apiB\"ModelGroupRepositoriesDetailsProtoP\x01\x62\x06proto3')
|
|
18
18
|
|
|
19
19
|
_globals = globals()
|
|
20
20
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
@@ -25,15 +25,19 @@ if _descriptor._USE_C_DESCRIPTORS == False:
|
|
|
25
25
|
_globals['_REPOSITORYDETAILS']._serialized_start=87
|
|
26
26
|
_globals['_REPOSITORYDETAILS']._serialized_end=201
|
|
27
27
|
_globals['_REMOTEREPOSITORYDETAILS']._serialized_start=204
|
|
28
|
-
_globals['_REMOTEREPOSITORYDETAILS']._serialized_end=
|
|
29
|
-
_globals['_DOCKERREPOSITORY']._serialized_start=
|
|
30
|
-
_globals['_DOCKERREPOSITORY']._serialized_end=
|
|
31
|
-
_globals['_DATASETREPOSITORY']._serialized_start=
|
|
32
|
-
_globals['_DATASETREPOSITORY']._serialized_end=
|
|
33
|
-
_globals['_ARTIFACTREPOSITORY']._serialized_start=
|
|
34
|
-
_globals['_ARTIFACTREPOSITORY']._serialized_end=
|
|
35
|
-
_globals['_HUGGINGFACEREPOSITORY']._serialized_start=
|
|
36
|
-
_globals['_HUGGINGFACEREPOSITORY']._serialized_end=
|
|
37
|
-
_globals['
|
|
38
|
-
_globals['
|
|
28
|
+
_globals['_REMOTEREPOSITORYDETAILS']._serialized_end=588
|
|
29
|
+
_globals['_DOCKERREPOSITORY']._serialized_start=590
|
|
30
|
+
_globals['_DOCKERREPOSITORY']._serialized_end=608
|
|
31
|
+
_globals['_DATASETREPOSITORY']._serialized_start=610
|
|
32
|
+
_globals['_DATASETREPOSITORY']._serialized_end=629
|
|
33
|
+
_globals['_ARTIFACTREPOSITORY']._serialized_start=631
|
|
34
|
+
_globals['_ARTIFACTREPOSITORY']._serialized_end=651
|
|
35
|
+
_globals['_HUGGINGFACEREPOSITORY']._serialized_start=653
|
|
36
|
+
_globals['_HUGGINGFACEREPOSITORY']._serialized_end=676
|
|
37
|
+
_globals['_NPMREPOSITORY']._serialized_start=678
|
|
38
|
+
_globals['_NPMREPOSITORY']._serialized_end=693
|
|
39
|
+
_globals['_PYPIREPOSITORY']._serialized_start=695
|
|
40
|
+
_globals['_PYPIREPOSITORY']._serialized_end=711
|
|
41
|
+
_globals['_REPOSITORYTYPE']._serialized_start=714
|
|
42
|
+
_globals['_REPOSITORYTYPE']._serialized_end=1207
|
|
39
43
|
# @@protoc_insertion_point(module_scope)
|
|
@@ -39,6 +39,8 @@ class RemoteRepositoryDetails(google.protobuf.message.Message):
|
|
|
39
39
|
|
|
40
40
|
DOCKER_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
41
41
|
HUGGING_FACE_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
42
|
+
NPM_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
43
|
+
PYPI_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
42
44
|
REPOSITORY_REMOTE_URL_FIELD_NUMBER: builtins.int
|
|
43
45
|
repository_remote_url: builtins.str
|
|
44
46
|
"""The remote repository URL"""
|
|
@@ -50,16 +52,26 @@ class RemoteRepositoryDetails(google.protobuf.message.Message):
|
|
|
50
52
|
def hugging_face_repository(self) -> global___HuggingFaceRepository:
|
|
51
53
|
"""HuggingFace repository"""
|
|
52
54
|
|
|
55
|
+
@property
|
|
56
|
+
def npm_repository(self) -> global___NpmRepository:
|
|
57
|
+
"""NPM repository"""
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def pypi_repository(self) -> global___PypiRepository:
|
|
61
|
+
"""PyPI repository"""
|
|
62
|
+
|
|
53
63
|
def __init__(
|
|
54
64
|
self,
|
|
55
65
|
*,
|
|
56
66
|
docker_repository: global___DockerRepository | None = ...,
|
|
57
67
|
hugging_face_repository: global___HuggingFaceRepository | None = ...,
|
|
68
|
+
npm_repository: global___NpmRepository | None = ...,
|
|
69
|
+
pypi_repository: global___PypiRepository | None = ...,
|
|
58
70
|
repository_remote_url: builtins.str = ...,
|
|
59
71
|
) -> None: ...
|
|
60
|
-
def HasField(self, field_name: typing.Literal["docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "repository_type", b"repository_type"]) -> builtins.bool: ...
|
|
61
|
-
def ClearField(self, field_name: typing.Literal["docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "repository_remote_url", b"repository_remote_url", "repository_type", b"repository_type"]) -> None: ...
|
|
62
|
-
def WhichOneof(self, oneof_group: typing.Literal["repository_type", b"repository_type"]) -> typing.Literal["docker_repository", "hugging_face_repository"] | None: ...
|
|
72
|
+
def HasField(self, field_name: typing.Literal["docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "npm_repository", b"npm_repository", "pypi_repository", b"pypi_repository", "repository_type", b"repository_type"]) -> builtins.bool: ...
|
|
73
|
+
def ClearField(self, field_name: typing.Literal["docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "npm_repository", b"npm_repository", "pypi_repository", b"pypi_repository", "repository_remote_url", b"repository_remote_url", "repository_type", b"repository_type"]) -> None: ...
|
|
74
|
+
def WhichOneof(self, oneof_group: typing.Literal["repository_type", b"repository_type"]) -> typing.Literal["docker_repository", "hugging_face_repository", "npm_repository", "pypi_repository"] | None: ...
|
|
63
75
|
|
|
64
76
|
global___RemoteRepositoryDetails = RemoteRepositoryDetails
|
|
65
77
|
|
|
@@ -103,6 +115,26 @@ class HuggingFaceRepository(google.protobuf.message.Message):
|
|
|
103
115
|
|
|
104
116
|
global___HuggingFaceRepository = HuggingFaceRepository
|
|
105
117
|
|
|
118
|
+
@typing.final
|
|
119
|
+
class NpmRepository(google.protobuf.message.Message):
|
|
120
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
121
|
+
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
) -> None: ...
|
|
125
|
+
|
|
126
|
+
global___NpmRepository = NpmRepository
|
|
127
|
+
|
|
128
|
+
@typing.final
|
|
129
|
+
class PypiRepository(google.protobuf.message.Message):
|
|
130
|
+
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
|
131
|
+
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
) -> None: ...
|
|
135
|
+
|
|
136
|
+
global___PypiRepository = PypiRepository
|
|
137
|
+
|
|
106
138
|
@typing.final
|
|
107
139
|
class RepositoryType(google.protobuf.message.Message):
|
|
108
140
|
"""The repository type"""
|
|
@@ -113,6 +145,8 @@ class RepositoryType(google.protobuf.message.Message):
|
|
|
113
145
|
DATASET_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
114
146
|
ARTIFACT_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
115
147
|
HUGGING_FACE_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
148
|
+
NPM_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
149
|
+
PYPI_REPOSITORY_FIELD_NUMBER: builtins.int
|
|
116
150
|
@property
|
|
117
151
|
def docker_repository(self) -> global___DockerRepository:
|
|
118
152
|
"""Docker repository"""
|
|
@@ -129,6 +163,14 @@ class RepositoryType(google.protobuf.message.Message):
|
|
|
129
163
|
def hugging_face_repository(self) -> global___HuggingFaceRepository:
|
|
130
164
|
"""HuggingFace repository"""
|
|
131
165
|
|
|
166
|
+
@property
|
|
167
|
+
def npm_repository(self) -> global___NpmRepository:
|
|
168
|
+
"""NPM repository"""
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def pypi_repository(self) -> global___PypiRepository:
|
|
172
|
+
"""PyPI repository"""
|
|
173
|
+
|
|
132
174
|
def __init__(
|
|
133
175
|
self,
|
|
134
176
|
*,
|
|
@@ -136,9 +178,11 @@ class RepositoryType(google.protobuf.message.Message):
|
|
|
136
178
|
dataset_repository: global___DatasetRepository | None = ...,
|
|
137
179
|
artifact_repository: global___ArtifactRepository | None = ...,
|
|
138
180
|
hugging_face_repository: global___HuggingFaceRepository | None = ...,
|
|
181
|
+
npm_repository: global___NpmRepository | None = ...,
|
|
182
|
+
pypi_repository: global___PypiRepository | None = ...,
|
|
139
183
|
) -> None: ...
|
|
140
|
-
def HasField(self, field_name: typing.Literal["artifact_repository", b"artifact_repository", "dataset_repository", b"dataset_repository", "docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "type", b"type"]) -> builtins.bool: ...
|
|
141
|
-
def ClearField(self, field_name: typing.Literal["artifact_repository", b"artifact_repository", "dataset_repository", b"dataset_repository", "docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "type", b"type"]) -> None: ...
|
|
142
|
-
def WhichOneof(self, oneof_group: typing.Literal["type", b"type"]) -> typing.Literal["docker_repository", "dataset_repository", "artifact_repository", "hugging_face_repository"] | None: ...
|
|
184
|
+
def HasField(self, field_name: typing.Literal["artifact_repository", b"artifact_repository", "dataset_repository", b"dataset_repository", "docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "npm_repository", b"npm_repository", "pypi_repository", b"pypi_repository", "type", b"type"]) -> builtins.bool: ...
|
|
185
|
+
def ClearField(self, field_name: typing.Literal["artifact_repository", b"artifact_repository", "dataset_repository", b"dataset_repository", "docker_repository", b"docker_repository", "hugging_face_repository", b"hugging_face_repository", "npm_repository", b"npm_repository", "pypi_repository", b"pypi_repository", "type", b"type"]) -> None: ...
|
|
186
|
+
def WhichOneof(self, oneof_group: typing.Literal["type", b"type"]) -> typing.Literal["docker_repository", "dataset_repository", "artifact_repository", "hugging_face_repository", "npm_repository", "pypi_repository"] | None: ...
|
|
143
187
|
|
|
144
188
|
global___RepositoryType = RepositoryType
|
|
@@ -3,6 +3,9 @@ from grpc import RpcError
|
|
|
3
3
|
|
|
4
4
|
from frogml._proto.qwak.execution.v1.backfill_pb2 import BackfillSpec
|
|
5
5
|
from frogml._proto.qwak.execution.v1.batch_pb2 import BatchIngestion
|
|
6
|
+
from frogml._proto.qwak.execution.v1.streaming_aggregation_pb2 import (
|
|
7
|
+
StreamingAggregationBackfillIngestion,
|
|
8
|
+
)
|
|
6
9
|
from frogml._proto.qwak.execution.v1.execution_service_pb2 import (
|
|
7
10
|
GetExecutionEntryRequest,
|
|
8
11
|
GetExecutionEntryResponse,
|
|
@@ -12,6 +15,8 @@ from frogml._proto.qwak.execution.v1.execution_service_pb2 import (
|
|
|
12
15
|
TriggerBackfillResponse,
|
|
13
16
|
TriggerBatchFeaturesetRequest,
|
|
14
17
|
TriggerBatchFeaturesetResponse,
|
|
18
|
+
TriggerStreamingAggregationBackfillRequest,
|
|
19
|
+
TriggerStreamingAggregationBackfillResponse,
|
|
15
20
|
)
|
|
16
21
|
from frogml._proto.qwak.execution.v1.execution_service_pb2_grpc import (
|
|
17
22
|
FeatureStoreExecutionServiceStub,
|
|
@@ -100,3 +105,25 @@ class ExecutionManagementClient:
|
|
|
100
105
|
raise FrogmlException(
|
|
101
106
|
f"Failed to get execution entry, error encountered {e}"
|
|
102
107
|
)
|
|
108
|
+
|
|
109
|
+
def trigger_streaming_aggregation_backfill(
|
|
110
|
+
self, backfill_ingestion: StreamingAggregationBackfillIngestion
|
|
111
|
+
) -> TriggerStreamingAggregationBackfillResponse:
|
|
112
|
+
"""
|
|
113
|
+
Receives a configured streaming aggregation backfill proto and triggers a streaming aggregation backfill against the execution manager
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
backfill_ingestion (StreamingAggregationBackfillIngestion): A protobuf message
|
|
117
|
+
containing the backfill specification details
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
TriggerStreamingAggregationBackfillResponse: response object from the execution manager
|
|
121
|
+
"""
|
|
122
|
+
try:
|
|
123
|
+
return self._feature_store_execution_service.TriggerStreamingAggregationBackfill(
|
|
124
|
+
TriggerStreamingAggregationBackfillRequest(backfill=backfill_ingestion)
|
|
125
|
+
)
|
|
126
|
+
except RpcError as e:
|
|
127
|
+
raise FrogmlException(
|
|
128
|
+
f"Failed to trigger streaming aggregation backfill job, error encountered {e}"
|
|
129
|
+
)
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
from frogml.core.clients.feature_store.execution_management_client import (
|
|
4
|
+
ExecutionManagementClient,
|
|
5
|
+
)
|
|
6
|
+
from frogml.feature_store.feature_sets.streaming_backfill import StreamingBackfill
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StreamingAggregationBackfill:
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
streaming_backfill: StreamingBackfill,
|
|
14
|
+
source_definition_path: pathlib.Path,
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the streaming aggregation backfill executor.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
streaming_backfill (StreamingBackfill): Specification containing the
|
|
21
|
+
featureset name, time range, data sources, and transformation
|
|
22
|
+
source_definition_path (Path): Path to the Python file containing the backfill
|
|
23
|
+
definition. Required for locating UDF artifacts.
|
|
24
|
+
"""
|
|
25
|
+
self._streaming_backfill = streaming_backfill
|
|
26
|
+
self._source_definition_path = source_definition_path
|
|
27
|
+
|
|
28
|
+
def trigger(self) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Triggers the streaming aggregation backfill execution.
|
|
31
|
+
|
|
32
|
+
Converts the backfill specification to proto format and sends it to
|
|
33
|
+
the execution manager to start the backfill job.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
str: The execution ID for tracking the backfill job status
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
FrogmlException: If the execution manager request fails
|
|
40
|
+
"""
|
|
41
|
+
backfill_proto = self._streaming_backfill._to_proto(
|
|
42
|
+
str(self._source_definition_path)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
execution_client = ExecutionManagementClient()
|
|
46
|
+
response = execution_client.trigger_streaming_aggregation_backfill(
|
|
47
|
+
backfill_proto
|
|
48
|
+
)
|
|
49
|
+
return response.execution_id
|
|
@@ -2,7 +2,7 @@ import collections
|
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from datetime import datetime
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
6
|
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
|
|
7
7
|
|
|
8
8
|
from typeguard import typechecked
|
|
@@ -29,6 +29,7 @@ from frogml._proto.qwak.feature_store.sources.streaming_pb2 import StreamingSour
|
|
|
29
29
|
from frogml._proto.qwak.feature_store.sources.streaming_pb2 import (
|
|
30
30
|
StreamingSource as ProtoStreamingSource,
|
|
31
31
|
)
|
|
32
|
+
from google.protobuf.timestamp_pb2 import Timestamp as ProtoTimestamp
|
|
32
33
|
from frogml.core.clients.feature_store import FeatureRegistryClient
|
|
33
34
|
from frogml.core.exceptions import FrogmlException
|
|
34
35
|
from frogml.feature_store._common.artifact_utils import (
|
|
@@ -47,7 +48,7 @@ from frogml.core.feature_store.feature_sets.metadata import (
|
|
|
47
48
|
set_metadata_on_function,
|
|
48
49
|
)
|
|
49
50
|
from frogml.feature_store.feature_sets.streaming_backfill import (
|
|
50
|
-
|
|
51
|
+
BackfillDataSource,
|
|
51
52
|
StreamingBackfill,
|
|
52
53
|
)
|
|
53
54
|
from frogml.core.feature_store.feature_sets.transformations import (
|
|
@@ -87,6 +88,7 @@ def feature_set(
|
|
|
87
88
|
key: Optional[str] = None,
|
|
88
89
|
auxiliary_sinks: List[BaseSink] = [],
|
|
89
90
|
repository: Optional[str] = None,
|
|
91
|
+
backfill_max_timestamp: Optional[datetime] = None,
|
|
90
92
|
):
|
|
91
93
|
"""
|
|
92
94
|
Creates a streaming feature set for the specified entity using the given streaming data sources.
|
|
@@ -122,6 +124,11 @@ def feature_set(
|
|
|
122
124
|
"""
|
|
123
125
|
|
|
124
126
|
def decorator(function):
|
|
127
|
+
if isinstance(function, StreamingBackfill):
|
|
128
|
+
raise FrogmlException(
|
|
129
|
+
"Backfill can no longer be defined as a decorator on the feature set, it must be triggered after feature set creation."
|
|
130
|
+
)
|
|
131
|
+
|
|
125
132
|
user_transformation = function()
|
|
126
133
|
FeaturesetUtils.validate_base_featureset_decorator(
|
|
127
134
|
user_transformation=user_transformation, entity=entity, key=key
|
|
@@ -132,10 +139,6 @@ def feature_set(
|
|
|
132
139
|
offline_scheduling_policy=offline_scheduling_policy,
|
|
133
140
|
)
|
|
134
141
|
|
|
135
|
-
streaming_backfill: Optional[StreamingBackfill] = (
|
|
136
|
-
StreamingBackfill.get_streaming_backfill_from_function(function=function)
|
|
137
|
-
)
|
|
138
|
-
|
|
139
142
|
fs_name = name or function.__name__
|
|
140
143
|
streaming_feature_set = StreamingFeatureSet(
|
|
141
144
|
name=fs_name,
|
|
@@ -162,7 +165,7 @@ def feature_set(
|
|
|
162
165
|
online_cluster_template=getattr(
|
|
163
166
|
function, _ONLINE_CLUSTER_SPEC, ClusterTemplate.SMALL
|
|
164
167
|
),
|
|
165
|
-
|
|
168
|
+
backfill_max_timestamp=backfill_max_timestamp,
|
|
166
169
|
__instance_module_path__=inspect.stack()[1].filename,
|
|
167
170
|
auxiliary_sinks=auxiliary_sinks,
|
|
168
171
|
)
|
|
@@ -209,55 +212,68 @@ def execution_specification(
|
|
|
209
212
|
@typechecked
|
|
210
213
|
def backfill(
|
|
211
214
|
*,
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
215
|
+
feature_set_name: str,
|
|
216
|
+
start_date: Optional[datetime],
|
|
217
|
+
end_date: Optional[datetime],
|
|
218
|
+
data_sources: Union[List[str], List[BackfillDataSource]],
|
|
216
219
|
backfill_cluster_template: Optional[ClusterTemplate] = ClusterTemplate.SMALL,
|
|
217
220
|
):
|
|
218
221
|
"""
|
|
219
|
-
|
|
222
|
+
Triggers a backfill execution for an existing streaming featureset. Currently available for streaming
|
|
220
223
|
aggregation featuresets only.
|
|
221
224
|
|
|
222
|
-
:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
data_sources=["users_registration_stream"],
|
|
235
|
-
timestamp_column_name="reg_date"
|
|
236
|
-
)
|
|
225
|
+
Args:
|
|
226
|
+
feature_set_name (str): Name of the FeatureSet to trigger a backfill for.
|
|
227
|
+
start_date (datetime): Backfill start date, on Streaming Aggregation Feature Sets,
|
|
228
|
+
needs to align with the FeatureSet tiles.
|
|
229
|
+
end_date (datetime): Backfill end date, on Streaming Aggregation Feature Sets,
|
|
230
|
+
needs to align with the FeatureSet tiles and be smaller than the Feature Set's backfill_max_timestamp.
|
|
231
|
+
data_sources (list[BackfillDataSource] | list[str]): A list of BackfillDataSource objects containing
|
|
232
|
+
batch source name and optional time range, or a list of batch source names (with no time range limits).
|
|
233
|
+
backfill_cluster_template (ClusterTemplate, optional): An optional cluster specification for the backfill job.
|
|
234
|
+
Defaults to SMALL.
|
|
235
|
+
|
|
236
|
+
Examples:
|
|
237
237
|
@streaming.backfill(
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
238
|
+
feature_set_name="user_streaming_agg_features",
|
|
239
|
+
start_date=datetime(2022,1,1,0,0,0),
|
|
240
|
+
end_date=datetime(2023,9,1,0,0,0),
|
|
241
|
+
data_sources=[BackfillDataSource(data_source_name="backfill_data_source",
|
|
242
|
+
start_datetime=datetime(2023,1,1,0,0,0),
|
|
243
|
+
end_datetime=datetime(2023,8,1,0,0,0))],
|
|
242
244
|
backfill_cluster_template=ClusterTemplate.SMALL
|
|
243
|
-
backfill_transformation=SparkSqlTransformation("SELECT user_id, reg_country, reg_date FROM backfill_data_source")
|
|
244
245
|
)
|
|
245
|
-
def
|
|
246
|
-
return SparkSqlTransformation("SELECT user_id, reg_country, reg_date FROM
|
|
246
|
+
def backfill_transformation():
|
|
247
|
+
return SparkSqlTransformation("SELECT user_id, reg_country, reg_date FROM backfill_data_source")
|
|
247
248
|
"""
|
|
248
249
|
|
|
249
250
|
def decorator(function):
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
251
|
+
if isinstance(function, StreamingFeatureSet):
|
|
252
|
+
raise FrogmlException(
|
|
253
|
+
"Backfill can no longer be defined as a decorator on the feature set, it must be triggered after feature set creation."
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
backfill_transformation: SparkSqlTransformation = function()
|
|
257
|
+
|
|
258
|
+
if not isinstance(backfill_transformation, SparkSqlTransformation):
|
|
259
|
+
raise FrogmlException(
|
|
260
|
+
"Backfill must defined on a method returning a SparkSqlTransformation"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
streaming_backfill = StreamingBackfill(
|
|
264
|
+
featureset_name=feature_set_name,
|
|
265
|
+
start_datetime=start_date,
|
|
266
|
+
end_datetime=end_date,
|
|
267
|
+
data_sources=StreamingBackfill._get_normalized_backfill_sources_spec(
|
|
268
|
+
data_sources
|
|
269
|
+
),
|
|
270
|
+
transform=backfill_transformation,
|
|
271
|
+
cluster_template=backfill_cluster_template,
|
|
258
272
|
)
|
|
259
273
|
|
|
260
|
-
|
|
274
|
+
functools.update_wrapper(streaming_backfill, backfill_transformation)
|
|
275
|
+
|
|
276
|
+
return streaming_backfill
|
|
261
277
|
|
|
262
278
|
return decorator
|
|
263
279
|
|
|
@@ -328,7 +344,7 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
328
344
|
offline_cluster_template: Optional[ClusterTemplate] = None
|
|
329
345
|
online_cluster_template: Optional[ClusterTemplate] = None
|
|
330
346
|
metadata: Optional[Metadata] = None
|
|
331
|
-
|
|
347
|
+
backfill_max_timestamp: Optional[StreamingBackfill] = None
|
|
332
348
|
auxiliary_sinks: List[BaseSink] = field(default_factory=lambda: [])
|
|
333
349
|
|
|
334
350
|
def __post_init__(self):
|
|
@@ -411,7 +427,6 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
411
427
|
proto_featureset_type = self._get_streaming_aggregation_featureset_proto(
|
|
412
428
|
artifact_url=artifact_url,
|
|
413
429
|
streaming_sources=data_sources,
|
|
414
|
-
feature_registry=feature_registry,
|
|
415
430
|
initial_tile_size=maybe_initial_tile_size,
|
|
416
431
|
)
|
|
417
432
|
|
|
@@ -465,10 +480,9 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
465
480
|
"Auxiliary Sinks Are not supported in Streaming Aggregation Feature Sets"
|
|
466
481
|
)
|
|
467
482
|
|
|
468
|
-
|
|
469
|
-
if self.backfill and not is_streaming_agg:
|
|
483
|
+
if self.backfill_max_timestamp and not is_streaming_agg:
|
|
470
484
|
raise FrogmlException(
|
|
471
|
-
"
|
|
485
|
+
"backfill_max_timestamp can only be set for Streaming Aggregation FeatureSet."
|
|
472
486
|
)
|
|
473
487
|
|
|
474
488
|
# Validate transformation is PySpark when multiple data sources are used
|
|
@@ -527,18 +541,29 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
527
541
|
)
|
|
528
542
|
raise FrogmlException(error_message_str)
|
|
529
543
|
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
544
|
+
if not self.backfill_max_timestamp:
|
|
545
|
+
raise FrogmlException(
|
|
546
|
+
"""
|
|
547
|
+
backfill_max_timestamp must be set for Streaming Aggregation FeatureSet.
|
|
548
|
+
Events earlier than this timestamp can only be processed by triggering backfill,
|
|
549
|
+
the Streaming job will not process events that are earlier than this timestamp.
|
|
550
|
+
"""
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
self._validate_streaming_aggregation_backfill_max_timestamp()
|
|
533
554
|
|
|
534
555
|
return initial_tile_size
|
|
535
556
|
|
|
536
|
-
def
|
|
557
|
+
def _validate_streaming_aggregation_backfill_max_timestamp(self):
|
|
537
558
|
initial_tile_size, _ = StreamingFeatureSet._get_default_slide_period(
|
|
538
559
|
self.transformation.windows
|
|
539
560
|
)
|
|
540
561
|
|
|
541
|
-
self.
|
|
562
|
+
if self.backfill_max_timestamp.timestamp() % initial_tile_size != 0:
|
|
563
|
+
raise FrogmlException(
|
|
564
|
+
f"Chosen backfill max timestamp is invalid,"
|
|
565
|
+
f" it has to be exactly dividable by slice size of {initial_tile_size} seconds."
|
|
566
|
+
)
|
|
542
567
|
|
|
543
568
|
@staticmethod
|
|
544
569
|
def _get_default_slide_period(
|
|
@@ -608,9 +633,12 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
608
633
|
self,
|
|
609
634
|
artifact_url: Optional[str],
|
|
610
635
|
streaming_sources: List[StreamingSource],
|
|
611
|
-
feature_registry: FeatureRegistryClient,
|
|
612
636
|
initial_tile_size: int,
|
|
613
637
|
) -> ProtoFeatureSetType:
|
|
638
|
+
backfill_max_timestamp = ProtoTimestamp()
|
|
639
|
+
backfill_max_timestamp.FromDatetime(
|
|
640
|
+
self.backfill_max_timestamp.astimezone(timezone.utc)
|
|
641
|
+
)
|
|
614
642
|
return ProtoFeatureSetType(
|
|
615
643
|
streaming_aggregation_feature_set=ProtoStreamingAggregationFeatureSet(
|
|
616
644
|
transformation=self.transformation._to_proto(
|
|
@@ -633,14 +661,7 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
633
661
|
allowed_late_arrival_seconds=60 * 10,
|
|
634
662
|
aggregations=self.transformation._get_aggregations_proto(),
|
|
635
663
|
),
|
|
636
|
-
backfill_spec=
|
|
637
|
-
|
|
638
|
-
feature_registry=feature_registry,
|
|
639
|
-
original_instance_module_path=self.__instance_module_path__,
|
|
640
|
-
featureset_name=self.name,
|
|
641
|
-
)
|
|
642
|
-
if self.backfill
|
|
643
|
-
else None
|
|
644
|
-
),
|
|
664
|
+
backfill_spec=None,
|
|
665
|
+
backfill_max_timestamp=backfill_max_timestamp,
|
|
645
666
|
)
|
|
646
667
|
)
|
|
@@ -1,34 +1,18 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
1
|
from dataclasses import dataclass
|
|
3
2
|
from datetime import datetime, timezone
|
|
4
3
|
from typing import List, Optional, Set, Union
|
|
5
4
|
|
|
6
|
-
from google.protobuf.timestamp_pb2 import Timestamp as ProtoTimestamp
|
|
7
|
-
|
|
8
5
|
from frogml._proto.qwak.feature_store.features.execution_pb2 import (
|
|
9
|
-
|
|
10
|
-
)
|
|
11
|
-
from frogml._proto.qwak.feature_store.features.feature_set_types_pb2 import (
|
|
12
|
-
BackfillBatchDataSourceSpec as ProtoBackfillBatchDataSourceSpec,
|
|
13
|
-
)
|
|
14
|
-
from frogml._proto.qwak.feature_store.features.feature_set_types_pb2 import (
|
|
15
|
-
BackfillDataSourceSpec as ProtoBackfillDataSourceSpec,
|
|
6
|
+
ExecutionSpec as ProtoExecutionSpec,
|
|
16
7
|
)
|
|
17
|
-
from frogml._proto.qwak.
|
|
18
|
-
|
|
8
|
+
from frogml._proto.qwak.execution.v1.streaming_aggregation_pb2 import (
|
|
9
|
+
StreamingAggregationBackfillIngestion as ProtoStreamingAggregationBackfillIngestion,
|
|
10
|
+
BackfillDataSource as ProtoBackfillDataSource,
|
|
11
|
+
TimeRange as ProtoTimeRange,
|
|
19
12
|
)
|
|
20
|
-
from
|
|
21
|
-
BatchSource as ProtoBatchSource,
|
|
22
|
-
)
|
|
23
|
-
from frogml.core.clients.feature_store import FeatureRegistryClient
|
|
13
|
+
from google.protobuf.timestamp_pb2 import Timestamp as ProtoTimestamp
|
|
24
14
|
from frogml.core.exceptions import FrogmlException
|
|
25
|
-
from frogml.feature_store._common.artifact_utils import
|
|
26
|
-
ArtifactSpec,
|
|
27
|
-
ArtifactsUploader,
|
|
28
|
-
)
|
|
29
|
-
from frogml.feature_store._common.feature_set_utils import (
|
|
30
|
-
get_batch_source_for_featureset,
|
|
31
|
-
)
|
|
15
|
+
from frogml.feature_store._common.artifact_utils import ArtifactSpec, ArtifactsUploader
|
|
32
16
|
from frogml.core.feature_store.feature_sets.execution_spec import ClusterTemplate
|
|
33
17
|
from frogml.core.feature_store.feature_sets.transformations import (
|
|
34
18
|
SparkSqlTransformation,
|
|
@@ -38,36 +22,15 @@ _BACKFILL_ = "_qwak_backfill_specification"
|
|
|
38
22
|
|
|
39
23
|
|
|
40
24
|
@dataclass
|
|
41
|
-
class
|
|
25
|
+
class BackfillDataSource:
|
|
42
26
|
data_source_name: str
|
|
43
|
-
|
|
44
|
-
@abstractmethod
|
|
45
|
-
def _to_proto(self, feature_registry: FeatureRegistryClient):
|
|
46
|
-
pass
|
|
47
|
-
|
|
48
|
-
@classmethod
|
|
49
|
-
def _from_proto(cls, proto: ProtoBackfillDataSourceSpec):
|
|
50
|
-
function_mapping = {"batch_data_source_spec": BackfillBatchDataSourceSpec}
|
|
51
|
-
|
|
52
|
-
backfill_source_type: str = proto.WhichOneof("type")
|
|
53
|
-
|
|
54
|
-
if backfill_source_type in function_mapping:
|
|
55
|
-
function_class = function_mapping.get(backfill_source_type)
|
|
56
|
-
return function_class._from_proto(proto)
|
|
57
|
-
|
|
58
|
-
raise FrogmlException(
|
|
59
|
-
f"Got unsupported backfill source type {backfill_source_type} for streaming backfill"
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@dataclass
|
|
64
|
-
class BackfillBatchDataSourceSpec(DataSourceBackfillSpec):
|
|
65
27
|
start_datetime: Optional[datetime] = None
|
|
66
28
|
end_datetime: Optional[datetime] = None
|
|
67
29
|
|
|
68
|
-
def
|
|
69
|
-
self
|
|
70
|
-
|
|
30
|
+
def __post_init__(self):
|
|
31
|
+
self._validate()
|
|
32
|
+
|
|
33
|
+
def _to_proto(self) -> ProtoBackfillDataSource:
|
|
71
34
|
start_timestamp: Optional[ProtoTimestamp] = None
|
|
72
35
|
end_timestamp: Optional[ProtoTimestamp] = None
|
|
73
36
|
|
|
@@ -79,63 +42,94 @@ class BackfillBatchDataSourceSpec(DataSourceBackfillSpec):
|
|
|
79
42
|
start_timestamp = ProtoTimestamp()
|
|
80
43
|
start_timestamp.FromDatetime(self.start_datetime.astimezone(timezone.utc))
|
|
81
44
|
|
|
82
|
-
|
|
83
|
-
batch_ds_name=self.data_source_name, feature_registry=feature_registry
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
return ProtoBackfillBatchDataSourceSpec(
|
|
87
|
-
data_source=proto_data_source,
|
|
45
|
+
time_range = ProtoTimeRange(
|
|
88
46
|
start_timestamp=start_timestamp,
|
|
89
47
|
end_timestamp=end_timestamp,
|
|
90
48
|
)
|
|
91
49
|
|
|
50
|
+
return ProtoBackfillDataSource(
|
|
51
|
+
data_source_name=self.data_source_name,
|
|
52
|
+
time_range=time_range,
|
|
53
|
+
)
|
|
54
|
+
|
|
92
55
|
@classmethod
|
|
93
|
-
def _from_proto(
|
|
94
|
-
cls, proto: ProtoBackfillDataSourceSpec
|
|
95
|
-
) -> "BackfillBatchDataSourceSpec":
|
|
56
|
+
def _from_proto(cls, proto: ProtoBackfillDataSource) -> "BackfillDataSource":
|
|
96
57
|
start_datetime: Optional[datetime] = None
|
|
97
58
|
end_datetime: Optional[datetime] = None
|
|
98
59
|
|
|
99
|
-
|
|
100
|
-
proto.batch_data_source_spec
|
|
101
|
-
)
|
|
60
|
+
time_range: ProtoTimeRange = proto.time_range
|
|
102
61
|
|
|
103
|
-
proto_start_timestamp: ProtoTimestamp =
|
|
104
|
-
|
|
62
|
+
proto_start_timestamp: Optional[ProtoTimestamp] = (
|
|
63
|
+
time_range.start_timestamp if time_range.start_timestamp else None
|
|
64
|
+
)
|
|
65
|
+
proto_end_timestamp: Optional[ProtoTimestamp] = (
|
|
66
|
+
time_range.end_timestamp if time_range.end_timestamp else None
|
|
67
|
+
)
|
|
105
68
|
|
|
106
|
-
start_datetime =
|
|
107
|
-
|
|
69
|
+
start_datetime = (
|
|
70
|
+
datetime.fromtimestamp(
|
|
71
|
+
proto_start_timestamp.seconds + proto_start_timestamp.nanos / 1e9
|
|
72
|
+
)
|
|
73
|
+
if proto_start_timestamp
|
|
74
|
+
else None
|
|
108
75
|
)
|
|
109
76
|
|
|
110
|
-
end_datetime =
|
|
111
|
-
|
|
77
|
+
end_datetime = (
|
|
78
|
+
datetime.fromtimestamp(
|
|
79
|
+
proto_end_timestamp.seconds + proto_end_timestamp.nanos / 1e9
|
|
80
|
+
)
|
|
81
|
+
if proto_end_timestamp
|
|
82
|
+
else None
|
|
112
83
|
)
|
|
113
84
|
|
|
114
85
|
return cls(
|
|
115
|
-
data_source_name=
|
|
86
|
+
data_source_name=proto.data_source_name,
|
|
116
87
|
start_datetime=start_datetime,
|
|
117
88
|
end_datetime=end_datetime,
|
|
118
89
|
)
|
|
119
90
|
|
|
91
|
+
def _validate(self):
|
|
92
|
+
if self.start_datetime and self.end_datetime:
|
|
93
|
+
if self.start_datetime >= self.end_datetime:
|
|
94
|
+
raise FrogmlException(
|
|
95
|
+
f"Backfill data source {self.data_source_name} has invalid time range: "
|
|
96
|
+
f"start_datetime {self.start_datetime} is after or equal end_datetime {self.end_datetime}."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if not self.data_source_name:
|
|
100
|
+
raise FrogmlException(
|
|
101
|
+
"Backfill data source must have a valid data source name."
|
|
102
|
+
)
|
|
103
|
+
|
|
120
104
|
|
|
121
105
|
@dataclass
|
|
122
106
|
class StreamingBackfill:
|
|
107
|
+
featureset_name: str
|
|
123
108
|
start_datetime: datetime
|
|
124
109
|
end_datetime: datetime
|
|
125
|
-
|
|
110
|
+
data_sources: List[BackfillDataSource]
|
|
126
111
|
transform: "SparkSqlTransformation"
|
|
127
112
|
cluster_template: Optional[ClusterTemplate] = ClusterTemplate.SMALL
|
|
128
113
|
|
|
129
114
|
def __post_init__(self):
|
|
130
|
-
if not self.
|
|
115
|
+
if not self.featureset_name:
|
|
116
|
+
raise FrogmlException("featureset_name must be provided for backfill.")
|
|
117
|
+
|
|
118
|
+
if not self.start_datetime or not self.end_datetime:
|
|
131
119
|
raise FrogmlException(
|
|
132
|
-
"
|
|
133
|
-
"At least one data source has to be provided when trying to create a streaming backfill."
|
|
120
|
+
"For streaming aggregation backfill, both start_datetime and end_datetime are mandatory fields."
|
|
134
121
|
)
|
|
135
122
|
|
|
136
|
-
if
|
|
123
|
+
if self.start_datetime >= self.end_datetime:
|
|
137
124
|
raise FrogmlException(
|
|
138
|
-
"
|
|
125
|
+
f"Backfill has invalid time range: "
|
|
126
|
+
f"start_datetime {self.start_datetime} is after or equal end_datetime {self.end_datetime}."
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if not self.data_sources:
|
|
130
|
+
raise FrogmlException(
|
|
131
|
+
"Trying to create a streaming backfill with no data sources. "
|
|
132
|
+
"At least one data source has to be provided when trying to create a streaming backfill."
|
|
139
133
|
)
|
|
140
134
|
|
|
141
135
|
if type(self.transform) is not SparkSqlTransformation:
|
|
@@ -147,7 +141,7 @@ class StreamingBackfill:
|
|
|
147
141
|
|
|
148
142
|
def _validate_unique_sources(self):
|
|
149
143
|
source_names: List[str] = [
|
|
150
|
-
data_source.data_source_name for data_source in self.
|
|
144
|
+
data_source.data_source_name for data_source in self.data_sources
|
|
151
145
|
]
|
|
152
146
|
duplicates: Set[str] = {
|
|
153
147
|
item for item in source_names if source_names.count(item) > 1
|
|
@@ -158,23 +152,14 @@ class StreamingBackfill:
|
|
|
158
152
|
f"Found these duplicates: {', '.join(set(duplicates))}"
|
|
159
153
|
)
|
|
160
154
|
|
|
161
|
-
def _validate_tile_size(self, initial_tile_size: int):
|
|
162
|
-
if self.end_datetime.timestamp() % initial_tile_size != 0:
|
|
163
|
-
raise FrogmlException(
|
|
164
|
-
f"Chosen backfill end datetime is invalid,"
|
|
165
|
-
f" it has to be exactly dividable by slice size of {initial_tile_size} seconds."
|
|
166
|
-
)
|
|
167
|
-
|
|
168
155
|
def _to_proto(
|
|
169
156
|
self,
|
|
170
|
-
feature_registry: FeatureRegistryClient,
|
|
171
|
-
featureset_name: str,
|
|
172
157
|
original_instance_module_path: str,
|
|
173
|
-
) ->
|
|
158
|
+
) -> ProtoStreamingAggregationBackfillIngestion:
|
|
174
159
|
artifact_url: Optional[str] = None
|
|
175
160
|
artifact_spec: Optional[ArtifactSpec] = ArtifactsUploader.get_artifact_spec(
|
|
176
161
|
transformation=self.transform,
|
|
177
|
-
featureset_name=f"{featureset_name}-backfill",
|
|
162
|
+
featureset_name=f"{self.featureset_name}-backfill",
|
|
178
163
|
__instance_module_path__=original_instance_module_path,
|
|
179
164
|
)
|
|
180
165
|
|
|
@@ -187,85 +172,54 @@ class StreamingBackfill:
|
|
|
187
172
|
start_timestamp = ProtoTimestamp()
|
|
188
173
|
start_timestamp.FromDatetime(self.start_datetime.astimezone(timezone.utc))
|
|
189
174
|
|
|
190
|
-
return
|
|
175
|
+
return ProtoStreamingAggregationBackfillIngestion(
|
|
176
|
+
featureset_name=self.featureset_name,
|
|
191
177
|
start_timestamp=start_timestamp,
|
|
192
178
|
end_timestamp=end_timestamp,
|
|
193
|
-
execution_spec=
|
|
194
|
-
|
|
179
|
+
execution_spec=ProtoExecutionSpec(
|
|
180
|
+
cluster_template=ClusterTemplate.to_proto(self.cluster_template)
|
|
195
181
|
),
|
|
196
182
|
transformation=self.transform._to_proto(artifact_path=artifact_url),
|
|
197
183
|
data_source_specs=[
|
|
198
|
-
|
|
199
|
-
batch_data_source_spec=data_source_spec._to_proto(
|
|
200
|
-
feature_registry=feature_registry
|
|
201
|
-
)
|
|
202
|
-
)
|
|
203
|
-
for data_source_spec in self.data_sources_specs
|
|
184
|
+
data_source._to_proto() for data_source in self.data_sources
|
|
204
185
|
],
|
|
205
186
|
)
|
|
206
187
|
|
|
207
188
|
@classmethod
|
|
208
|
-
def _from_proto(cls, proto:
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
data_sources_specs = [
|
|
214
|
-
BackfillBatchDataSourceSpec._from_proto(ds)
|
|
215
|
-
for ds in proto.data_source_specs
|
|
189
|
+
def _from_proto(cls, proto: ProtoStreamingAggregationBackfillIngestion):
|
|
190
|
+
backfill_data_sources = [
|
|
191
|
+
BackfillDataSource._from_proto(ds) for ds in proto.data_source_specs
|
|
216
192
|
]
|
|
217
193
|
|
|
218
194
|
return cls(
|
|
195
|
+
featureset_name=proto.featureset_name,
|
|
219
196
|
start_datetime=datetime.fromtimestamp(
|
|
220
197
|
proto.start_timestamp.seconds + proto.start_timestamp.nanos / 1e9
|
|
221
198
|
),
|
|
222
199
|
end_datetime=datetime.fromtimestamp(
|
|
223
200
|
proto.end_timestamp.seconds + proto.end_timestamp.nanos / 1e9
|
|
224
201
|
),
|
|
225
|
-
|
|
202
|
+
data_sources=backfill_data_sources,
|
|
226
203
|
transform=SparkSqlTransformation._from_proto(
|
|
227
204
|
proto.transformation.sql_transformation
|
|
228
205
|
),
|
|
206
|
+
cluster_template=(
|
|
207
|
+
ClusterTemplate.from_proto(proto.execution_spec.cluster_template)
|
|
208
|
+
if proto.execution_spec.cluster_template
|
|
209
|
+
else None
|
|
210
|
+
),
|
|
229
211
|
)
|
|
230
212
|
|
|
231
213
|
@staticmethod
|
|
232
214
|
def _get_normalized_backfill_sources_spec(
|
|
233
|
-
data_sources: Union[List[str], List[
|
|
234
|
-
) -> List[
|
|
235
|
-
# reformat all data source
|
|
215
|
+
data_sources: Union[List[str], List[BackfillDataSource]],
|
|
216
|
+
) -> List[BackfillDataSource]:
|
|
217
|
+
# reformat all data source names to 'BackfillDataSource'
|
|
236
218
|
return [
|
|
237
219
|
(
|
|
238
|
-
|
|
220
|
+
BackfillDataSource(data_source_name=data_source)
|
|
239
221
|
if isinstance(data_source, str)
|
|
240
222
|
else data_source
|
|
241
223
|
)
|
|
242
224
|
for data_source in data_sources
|
|
243
225
|
]
|
|
244
|
-
|
|
245
|
-
@classmethod
|
|
246
|
-
def set_streaming_backfill_on_function(
|
|
247
|
-
cls,
|
|
248
|
-
function,
|
|
249
|
-
start_date: datetime,
|
|
250
|
-
end_date: datetime,
|
|
251
|
-
data_sources: Union[List[str], List[DataSourceBackfillSpec]],
|
|
252
|
-
backfill_transformation: SparkSqlTransformation,
|
|
253
|
-
backfill_cluster_template: Optional[ClusterTemplate] = ClusterTemplate.SMALL,
|
|
254
|
-
):
|
|
255
|
-
setattr(
|
|
256
|
-
function,
|
|
257
|
-
_BACKFILL_,
|
|
258
|
-
cls(
|
|
259
|
-
start_datetime=start_date,
|
|
260
|
-
end_datetime=end_date,
|
|
261
|
-
data_sources_specs=StreamingBackfill._get_normalized_backfill_sources_spec(
|
|
262
|
-
data_sources
|
|
263
|
-
),
|
|
264
|
-
transform=backfill_transformation,
|
|
265
|
-
cluster_template=backfill_cluster_template,
|
|
266
|
-
),
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
@staticmethod
|
|
270
|
-
def get_streaming_backfill_from_function(function):
|
|
271
|
-
return getattr(function, _BACKFILL_, None)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
frogml/__init__.py,sha256=
|
|
1
|
+
frogml/__init__.py,sha256=SYhABGZ4g5cE2oC884FNfK9iaaNnmOzfeua9kts4jlk,741
|
|
2
2
|
frogml/_proto/jfml/hosting_gateway/v1/build_upload_url_pb2.py,sha256=pY7-QarLJWbL9uNmZ69RfE7IPVzwpVIhtd3A0wztSNY,1942
|
|
3
3
|
frogml/_proto/jfml/hosting_gateway/v1/build_upload_url_pb2.pyi,sha256=505I4uUrCmv2hsUw2H5eT-nlkNOCQOWodpvEkH2fHIo,3073
|
|
4
4
|
frogml/_proto/jfml/hosting_gateway/v1/build_upload_url_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
|
@@ -542,8 +542,8 @@ frogml/_proto/qwak/model_descriptor/open_ai_descriptor_pb2_grpc.py,sha256=1oboBP
|
|
|
542
542
|
frogml/_proto/qwak/model_group/model_group_pb2.py,sha256=vKhDaG_NnEMNxqGSemEpHATsSWUS9nhl5Xaz9MhPIfg,5317
|
|
543
543
|
frogml/_proto/qwak/model_group/model_group_pb2.pyi,sha256=OxpEeUGJXQACiiCbAAWDSkBuTSsT7qnKcH-3A-oA7wQ,9144
|
|
544
544
|
frogml/_proto/qwak/model_group/model_group_pb2_grpc.py,sha256=YgDt3DtagEdZ7UtxcP5iuoJeFDcVwQOrOU9o_tk4vLs,9258
|
|
545
|
-
frogml/_proto/qwak/model_group/model_group_repository_details_pb2.py,sha256=
|
|
546
|
-
frogml/_proto/qwak/model_group/model_group_repository_details_pb2.pyi,sha256=
|
|
545
|
+
frogml/_proto/qwak/model_group/model_group_repository_details_pb2.py,sha256=BxaVjDayd9h5Va2ZId3pNPeJFAqVXb8TJHIWmYsURN4,3806
|
|
546
|
+
frogml/_proto/qwak/model_group/model_group_repository_details_pb2.pyi,sha256=3oCsYmmfvBjGkA1nRcx2cg11WNr40uuR3gAWXC7Wz2I,7245
|
|
547
547
|
frogml/_proto/qwak/model_group/model_group_repository_details_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
|
548
548
|
frogml/_proto/qwak/models/models_pb2.py,sha256=XF3-e8xW62ymbg7xGRCwc4fNsgXV2UZJ1lPgv47Ylkk,19046
|
|
549
549
|
frogml/_proto/qwak/models/models_pb2.pyi,sha256=m2NGA7K3Jz3YFp8nOW4l_AB5kgjJUEDmr3xMUPYu3Dc,42638
|
|
@@ -672,7 +672,7 @@ frogml/core/clients/data_versioning/data_tag_filter.py,sha256=5YK031WGNrvkydrruW
|
|
|
672
672
|
frogml/core/clients/deployment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
673
673
|
frogml/core/clients/deployment/client.py,sha256=mMwguoxysULkue-ABYQwVdl-RtUE6VtD-ulq2IpwD-A,6883
|
|
674
674
|
frogml/core/clients/feature_store/__init__.py,sha256=mMCPBHDga6Y7dtJfNoHvfOvCyjNUHrVDX5uVsL2JkGk,53
|
|
675
|
-
frogml/core/clients/feature_store/execution_management_client.py,sha256=
|
|
675
|
+
frogml/core/clients/feature_store/execution_management_client.py,sha256=xftjUoCfgmQs1IuXprxgiYDyW_RCrOxdOWVFCfVEQQk,5343
|
|
676
676
|
frogml/core/clients/feature_store/job_registry_client.py,sha256=Rhb5YLvu9JmPpgwsK2piZYxi4OosSaFfFymUnrp67as,2670
|
|
677
677
|
frogml/core/clients/feature_store/management_client.py,sha256=o0VtaxAq0SFq2s1sEW2LmoHoTI8e5xS63ZRGjtoMiIw,18284
|
|
678
678
|
frogml/core/clients/feature_store/offline_serving_client.py,sha256=yRLAmdOscqGOo4w_DoeAR6GaakOCSxw_378tggkum9I,9377
|
|
@@ -743,6 +743,7 @@ frogml/core/feature_store/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
743
743
|
frogml/core/feature_store/execution/backfill.py,sha256=35h8Q82AlPSN09kgIR5iyHEg-0yK2AvUOcxthpWF7qo,6716
|
|
744
744
|
frogml/core/feature_store/execution/execution.py,sha256=CgPJLPFRhBI8pQiAOkDSO0OtKIAQat3kbaJ_NsFB-iE,21324
|
|
745
745
|
frogml/core/feature_store/execution/execution_query.py,sha256=eTMxx-SmyjDPoBZwqJTJn-qs1TfkRgs3akvN_kMId1Y,3646
|
|
746
|
+
frogml/core/feature_store/execution/streaming_backfill.py,sha256=xzba3rjHh-RolA7nw70dgC6BgOUTRcwjyWYUQakLVmA,1695
|
|
746
747
|
frogml/core/feature_store/feature_sets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
747
748
|
frogml/core/feature_store/feature_sets/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
748
749
|
frogml/core/feature_store/feature_sets/_utils/_featureset_utils.py,sha256=xWy-7_ai9OA7tCkQIrGwdqjZHKXrtQgmqHxHq1l3afo,1658
|
|
@@ -903,8 +904,8 @@ frogml/feature_store/data_sources/streaming/kafka/kafka.py,sha256=Bo_yxjNcP-IJ4T
|
|
|
903
904
|
frogml/feature_store/feature_sets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
904
905
|
frogml/feature_store/feature_sets/base_feature_set.py,sha256=Rk8ZWjQigPnLBERePXjdJT_Pf1VzF9js7bPLnPCCYLM,5454
|
|
905
906
|
frogml/feature_store/feature_sets/batch.py,sha256=JHEybhYZuVLFeJ7MiXGYHCITfldUpw9El6BcGCIEPzg,17626
|
|
906
|
-
frogml/feature_store/feature_sets/streaming.py,sha256=
|
|
907
|
-
frogml/feature_store/feature_sets/streaming_backfill.py,sha256=
|
|
907
|
+
frogml/feature_store/feature_sets/streaming.py,sha256=1f63h_YP-qDDYwNCTZiG5M0Gh1iHRLvo0QoFdPyA2fc,26915
|
|
908
|
+
frogml/feature_store/feature_sets/streaming_backfill.py,sha256=vjgFGjYNnER7a7awG56dtFQ5U4zKhTNSt805SRNEtaQ,8351
|
|
908
909
|
frogml/feature_store/offline/__init__.py,sha256=TUu3VuxdbVrYpWD9B2LDno1HfoGbI2X4fHw7vjSJ2uk,98
|
|
909
910
|
frogml/feature_store/offline/client_v2.py,sha256=33CPNWMmZGw8nVet5TrJa5G0hBr93SVsgVKbdiTHK9I,14841
|
|
910
911
|
frogml/feature_store/online/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -1116,7 +1117,7 @@ frogml_services_mock/mocks/build_orchestrator_service_api.py,sha256=7mZzOahph29Z
|
|
|
1116
1117
|
frogml_services_mock/mocks/data_versioning_service.py,sha256=8DzV5oxH7DXZAu_ZBiPEwW9m1AwbOlYOO1PFPjkq_Dk,2470
|
|
1117
1118
|
frogml_services_mock/mocks/deployment_management_service.py,sha256=wofBAw-2NKToTiFYxHqjR3QDrvplpV8NWNZMiIX6U_Q,20583
|
|
1118
1119
|
frogml_services_mock/mocks/ecosystem_service_api.py,sha256=saJYdT43nGVNyozWeDVc6HIXAsNvCdU5J1i-NNrnOr4,2089
|
|
1119
|
-
frogml_services_mock/mocks/execution_management_service.py,sha256=
|
|
1120
|
+
frogml_services_mock/mocks/execution_management_service.py,sha256=pRX_DWzayMhwbw7zQVRx2blVpiubqVWpZng2Xn8ZgI0,1205
|
|
1120
1121
|
frogml_services_mock/mocks/feature_store_data_sources_manager_api.py,sha256=YNd-0abshQH6bfrIEr1z0awIde_SYabfuZHV83LmcJ4,5249
|
|
1121
1122
|
frogml_services_mock/mocks/feature_store_entities_manager_api.py,sha256=fF3hhJvbd7PDPyghvqtiJudISOR1GasP6MDs5X44Tic,3355
|
|
1122
1123
|
frogml_services_mock/mocks/feature_store_feature_set_manager_api.py,sha256=ghTbNTVj-FjSV63AYdha-Z7GCX0Bm6PDT6xYzUa5DVA,10935
|
|
@@ -1149,6 +1150,6 @@ frogml_services_mock/mocks/utils/exception_handlers.py,sha256=k_8mez3cwjNjKE9yGQ
|
|
|
1149
1150
|
frogml_services_mock/services_mock.py,sha256=y1_h189Ldu5MdVUW4nj-WMBtvUfkM_aKse7UfAb6Rxk,19419
|
|
1150
1151
|
frogml_services_mock/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
1151
1152
|
frogml_services_mock/utils/service_utils.py,sha256=ZlB0CnB1J6oBn6_m7fQO2U8tKoboHdUa6ljjkRMYNXU,265
|
|
1152
|
-
frogml-1.2.
|
|
1153
|
-
frogml-1.2.
|
|
1154
|
-
frogml-1.2.
|
|
1153
|
+
frogml-1.2.24.dist-info/METADATA,sha256=lor0gL06kcVaM0GVKtB0FIf2CQz1sRCiwu9s95pgyrI,5599
|
|
1154
|
+
frogml-1.2.24.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
1155
|
+
frogml-1.2.24.dist-info/RECORD,,
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from grpc import RpcError
|
|
2
2
|
|
|
3
|
-
from frogml._proto.qwak.execution.v1.execution_service_pb2 import
|
|
3
|
+
from frogml._proto.qwak.execution.v1.execution_service_pb2 import (
|
|
4
|
+
TriggerBackfillResponse,
|
|
5
|
+
TriggerStreamingAggregationBackfillResponse,
|
|
6
|
+
)
|
|
4
7
|
from frogml._proto.qwak.execution.v1.execution_service_pb2_grpc import (
|
|
5
8
|
FeatureStoreExecutionServiceServicer,
|
|
6
9
|
)
|
|
@@ -24,3 +27,10 @@ class ExecutionManagementServiceMock(FeatureStoreExecutionServiceServicer):
|
|
|
24
27
|
if self._raise_exception_on_request:
|
|
25
28
|
raise RpcError
|
|
26
29
|
return TriggerBackfillResponse(execution_id=self._execution_id)
|
|
30
|
+
|
|
31
|
+
def TriggerStreamingAggregationBackfill(self, request, context):
|
|
32
|
+
if self._raise_exception_on_request:
|
|
33
|
+
raise RpcError
|
|
34
|
+
return TriggerStreamingAggregationBackfillResponse(
|
|
35
|
+
execution_id=self._execution_id
|
|
36
|
+
)
|
|
File without changes
|