qwak-core 0.4.378__py3-none-any.whl → 0.5.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _qwak_proto/qwak/administration/account/v1/account_pb2.py +20 -18
- _qwak_proto/qwak/administration/account/v1/account_pb2.pyi +21 -2
- _qwak_proto/qwak/admiral/secret/v0/secret_pb2.py +16 -14
- _qwak_proto/qwak/admiral/secret/v0/secret_pb2.pyi +21 -2
- _qwak_proto/qwak/builds/build_values_pb2.py +24 -18
- _qwak_proto/qwak/builds/build_values_pb2.pyi +21 -1
- _qwak_proto/qwak/execution/v1/streaming_aggregation_pb2.py +18 -11
- _qwak_proto/qwak/execution/v1/streaming_aggregation_pb2.pyi +71 -1
- _qwak_proto/qwak/feature_store/features/feature_set_pb2.py +4 -4
- _qwak_proto/qwak/feature_store/features/feature_set_pb2.pyi +4 -0
- _qwak_proto/qwak/feature_store/features/feature_set_types_pb2.py +60 -58
- _qwak_proto/qwak/feature_store/features/feature_set_types_pb2.pyi +7 -2
- _qwak_proto/qwak/kube_deployment_captain/batch_job_pb2.py +40 -40
- _qwak_proto/qwak/kube_deployment_captain/batch_job_pb2.pyi +7 -1
- _qwak_proto/qwak/model_group/model_group_repository_details_pb2.py +16 -12
- _qwak_proto/qwak/model_group/model_group_repository_details_pb2.pyi +44 -6
- _qwak_proto/qwak/projects/projects_pb2.py +17 -15
- _qwak_proto/qwak/secret_service/secret_service_pb2.pyi +1 -1
- qwak/__init__.py +1 -1
- qwak/clients/feature_store/execution_management_client.py +28 -0
- qwak/exceptions/__init__.py +1 -0
- qwak/exceptions/qwak_grpc_address_exception.py +9 -0
- qwak/feature_store/execution/streaming_backfill.py +48 -0
- qwak/feature_store/feature_sets/streaming.py +84 -63
- qwak/feature_store/feature_sets/streaming_backfill.py +88 -124
- qwak/inner/const.py +2 -6
- qwak/inner/di_configuration/__init__.py +1 -67
- qwak/inner/di_configuration/dependency_wiring.py +98 -0
- qwak/inner/tool/grpc/grpc_tools.py +123 -3
- qwak/llmops/generation/chat/openai/types/chat/chat_completion.py +24 -6
- qwak/llmops/generation/chat/openai/types/chat/chat_completion_chunk.py +44 -8
- qwak/llmops/generation/chat/openai/types/chat/chat_completion_message.py +6 -3
- {qwak_core-0.4.378.dist-info → qwak_core-0.5.12.dist-info}/METADATA +4 -6
- {qwak_core-0.4.378.dist-info → qwak_core-0.5.12.dist-info}/RECORD +36 -33
- qwak_services_mock/mocks/execution_management_service.py +9 -1
- {qwak_core-0.4.378.dist-info → qwak_core-0.5.12.dist-info}/WHEEL +0 -0
|
@@ -11,12 +11,12 @@ from google.protobuf.internal import builder as _builder
|
|
|
11
11
|
_sym_db = _symbol_database.Default()
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
|
14
15
|
from _qwak_proto.qwak.models import models_pb2 as qwak_dot_models_dot_models__pb2
|
|
15
16
|
from _qwak_proto.qwak.projects import jfrog_project_spec_pb2 as qwak_dot_projects_dot_jfrog__project__spec__pb2
|
|
16
|
-
from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1cqwak/projects/projects.proto\x12\x18qwak.projects.management\x1a\x18qwak/models/models.proto\x1a&qwak/projects/jfrog_project_spec.proto\
|
|
19
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1cqwak/projects/projects.proto\x12\x18qwak.projects.management\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x18qwak/models/models.proto\x1a&qwak/projects/jfrog_project_spec.proto\"\xad\x03\n\x0eProjectRequest\x12P\n\x16\x63reate_project_request\x18\x01 \x01(\x0b\x32..qwak.projects.management.CreateProjectRequestH\x00\x12P\n\x16\x64\x65lete_project_request\x18\x02 \x01(\x0b\x32..qwak.projects.management.DeleteProjectRequestH\x00\x12P\n\x16update_project_request\x18\x03 \x01(\x0b\x32..qwak.projects.management.UpdateProjectRequestH\x00\x12J\n\x13get_project_request\x18\x04 \x01(\x0b\x32+.qwak.projects.management.GetProjectRequestH\x00\x12N\n\x15list_projects_request\x18\x05 \x01(\x0b\x32-.qwak.projects.management.ListProjectsRequestH\x00\x42\t\n\x07request\"\x91\x01\n\x14\x43reateProjectRequest\x12\x14\n\x0cproject_name\x18\x01 \x01(\t\x12\x1b\n\x13project_description\x18\x02 \x01(\t\x12\x46\n\njfrog_spec\x18\x03 \x01(\x0b\x32\x32.qwak.projects.management.ModelRepositoryJFrogSpec\"O\n\x15\x43reateProjectResponse\x12\x36\n\x07project\x18\x01 \x01(\x0b\x32%.qwak.projects.management.ProjectSpec\"\x15\n\x13ListProjectsRequest\"O\n\x14ListProjectsResponse\x12\x37\n\x08projects\x18\x01 \x03(\x0b\x32%.qwak.projects.management.ProjectSpec\"=\n\x11GetProjectRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x14\n\x0cproject_name\x18\x02 \x01(\t\"H\n\x12GetProjectResponse\x12\x32\n\x07project\x18\x01 \x01(\x0b\x32!.qwak.projects.management.Project\"*\n\x14\x44\x65leteProjectRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\"%\n\x15\x44\x65leteProjectResponse\x12\x0c\n\x04info\x18\x01 \x01(\t\"{\n\x14UpdateProjectRequest\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12O\n\x13project_spec_config\x18\x02 \x01(\x0b\x32\x32.qwak.projects.management.ProjectSpecConfiguration\"T\n\x15UpdateProjectResponse\x12;\n\x0cproject_spec\x18\x01 \x01(\x0b\x32%.qwak.projects.management.ProjectSpec\"\xac\x01\n\x07Project\x12\x33\n\x04spec\x18\x01 \x01(\x0b\x32%.qwak.projects.management.ProjectSpec\x12\x31\n\x06models\x18\x02 \x03(\x0b\x32\x1d.qwak.models.management.ModelB\x02\x18\x01\x12\x39\n\rmodels_briefs\x18\x03 \x03(\x0b\x32\".qwak.models.management.ModelBrief\"\xeb\x03\n\x0bProjectSpec\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x14\n\x0cproject_name\x18\x02 \x01(\t\x12\x1b\n\x13project_description\x18\x03 \x01(\t\x12\x44\n\x0eproject_status\x18\x04 \x01(\x0e\x32,.qwak.projects.management.ProjectSpec.Status\x12.\n\ncreated_at\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x12\n\ncreated_by\x18\x06 \x01(\t\x12\x34\n\x10last_modified_at\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x18\n\x10last_modified_by\x18\x08 \x01(\t\x12\x14\n\x0cmodels_count\x18\t \x01(\x05\x12\x15\n\rmodels_active\x18\n \x01(\x05\x12N\n\x0ejfrog_settings\x18\x0b \x01(\x0b\x32\x36.qwak.projects.management.ModelRepositoryJFrogSettings\">\n\x06Status\x12\x0b\n\x07INVALID\x10\x00\x12\x0c\n\x08\x44ISABLED\x10\x01\x12\r\n\tSUSPENDED\x10\x02\x12\n\n\x06\x41\x43TIVE\x10\x03\"Q\n\x18ProjectSpecConfiguration\x12\x18\n\x0cproject_name\x18\x01 \x01(\tB\x02\x18\x01\x12\x1b\n\x13project_description\x18\x02 \x01(\t\"\xb1\x02\n\x1cModelRepositoryJFrogSettings\x12S\n\x11pull_image_secret\x18\x01 \x01(\x0b\x32\x38.qwak.projects.management.ModelRepositoryPullImageSecret\x12[\n\x15jfrog_project_details\x18\x02 \x01(\x0b\x32<.qwak.projects.management.ModelRepositoryJFrogProjectDetails\x12_\n\x1ajfrog_repositories_details\x18\x03 \x01(\x0b\x32;.qwak.projects.management.ModelRepositoryArtifactoryDetails\"@\n\x1eModelRepositoryPullImageSecret\x12\x1e\n\x16pull_image_secret_name\x18\x01 \x01(\t\"?\n\"ModelRepositoryJFrogProjectDetails\x12\x19\n\x11jfrog_project_key\x18\x01 \x01(\t\"\xb7\x02\n!ModelRepositoryArtifactoryDetails\x12[\n\x1b\x61rtifact_repository_details\x18\x01 \x01(\x0b\x32\x36.qwak.projects.management.ArtifactoryRepositoryDetails\x12Z\n\x1a\x64\x61taset_repository_details\x18\x02 \x01(\x0b\x32\x36.qwak.projects.management.ArtifactoryRepositoryDetails\x12Y\n\x19\x64ocker_repository_details\x18\x03 \x01(\x0b\x32\x36.qwak.projects.management.ArtifactoryRepositoryDetails\"6\n\x1c\x41rtifactoryRepositoryDetails\x12\x16\n\x0erepository_key\x18\x01 \x01(\t2\xc9\x04\n\x19ProjectsManagementService\x12p\n\rCreateProject\x12..qwak.projects.management.CreateProjectRequest\x1a/.qwak.projects.management.CreateProjectResponse\x12m\n\x0cListProjects\x12-.qwak.projects.management.ListProjectsRequest\x1a..qwak.projects.management.ListProjectsResponse\x12g\n\nGetProject\x12+.qwak.projects.management.GetProjectRequest\x1a,.qwak.projects.management.GetProjectResponse\x12p\n\rDeleteProject\x12..qwak.projects.management.DeleteProjectRequest\x1a/.qwak.projects.management.DeleteProjectResponse\x12p\n\rUpdateProject\x12..qwak.projects.management.UpdateProjectRequest\x1a/.qwak.projects.management.UpdateProjectResponseB\'\n#com.qwak.ai.management.projects.apiP\x01\x62\x06proto3')
|
|
20
20
|
|
|
21
21
|
_globals = globals()
|
|
22
22
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
|
@@ -27,6 +27,8 @@ if _descriptor._USE_C_DESCRIPTORS == False:
|
|
|
27
27
|
DESCRIPTOR._serialized_options = b'\n#com.qwak.ai.management.projects.apiP\001'
|
|
28
28
|
_PROJECT.fields_by_name['models']._options = None
|
|
29
29
|
_PROJECT.fields_by_name['models']._serialized_options = b'\030\001'
|
|
30
|
+
_PROJECTSPECCONFIGURATION.fields_by_name['project_name']._options = None
|
|
31
|
+
_PROJECTSPECCONFIGURATION.fields_by_name['project_name']._serialized_options = b'\030\001'
|
|
30
32
|
_globals['_PROJECTREQUEST']._serialized_start=158
|
|
31
33
|
_globals['_PROJECTREQUEST']._serialized_end=587
|
|
32
34
|
_globals['_CREATEPROJECTREQUEST']._serialized_start=590
|
|
@@ -56,17 +58,17 @@ if _descriptor._USE_C_DESCRIPTORS == False:
|
|
|
56
58
|
_globals['_PROJECTSPEC_STATUS']._serialized_start=1958
|
|
57
59
|
_globals['_PROJECTSPEC_STATUS']._serialized_end=2020
|
|
58
60
|
_globals['_PROJECTSPECCONFIGURATION']._serialized_start=2022
|
|
59
|
-
_globals['_PROJECTSPECCONFIGURATION']._serialized_end=
|
|
60
|
-
_globals['_MODELREPOSITORYJFROGSETTINGS']._serialized_start=
|
|
61
|
-
_globals['_MODELREPOSITORYJFROGSETTINGS']._serialized_end=
|
|
62
|
-
_globals['_MODELREPOSITORYPULLIMAGESECRET']._serialized_start=
|
|
63
|
-
_globals['_MODELREPOSITORYPULLIMAGESECRET']._serialized_end=
|
|
64
|
-
_globals['_MODELREPOSITORYJFROGPROJECTDETAILS']._serialized_start=
|
|
65
|
-
_globals['_MODELREPOSITORYJFROGPROJECTDETAILS']._serialized_end=
|
|
66
|
-
_globals['_MODELREPOSITORYARTIFACTORYDETAILS']._serialized_start=
|
|
67
|
-
_globals['_MODELREPOSITORYARTIFACTORYDETAILS']._serialized_end=
|
|
68
|
-
_globals['_ARTIFACTORYREPOSITORYDETAILS']._serialized_start=
|
|
69
|
-
_globals['_ARTIFACTORYREPOSITORYDETAILS']._serialized_end=
|
|
70
|
-
_globals['_PROJECTSMANAGEMENTSERVICE']._serialized_start=
|
|
71
|
-
_globals['_PROJECTSMANAGEMENTSERVICE']._serialized_end=
|
|
61
|
+
_globals['_PROJECTSPECCONFIGURATION']._serialized_end=2103
|
|
62
|
+
_globals['_MODELREPOSITORYJFROGSETTINGS']._serialized_start=2106
|
|
63
|
+
_globals['_MODELREPOSITORYJFROGSETTINGS']._serialized_end=2411
|
|
64
|
+
_globals['_MODELREPOSITORYPULLIMAGESECRET']._serialized_start=2413
|
|
65
|
+
_globals['_MODELREPOSITORYPULLIMAGESECRET']._serialized_end=2477
|
|
66
|
+
_globals['_MODELREPOSITORYJFROGPROJECTDETAILS']._serialized_start=2479
|
|
67
|
+
_globals['_MODELREPOSITORYJFROGPROJECTDETAILS']._serialized_end=2542
|
|
68
|
+
_globals['_MODELREPOSITORYARTIFACTORYDETAILS']._serialized_start=2545
|
|
69
|
+
_globals['_MODELREPOSITORYARTIFACTORYDETAILS']._serialized_end=2856
|
|
70
|
+
_globals['_ARTIFACTORYREPOSITORYDETAILS']._serialized_start=2858
|
|
71
|
+
_globals['_ARTIFACTORYREPOSITORYDETAILS']._serialized_end=2912
|
|
72
|
+
_globals['_PROJECTSMANAGEMENTSERVICE']._serialized_start=2915
|
|
73
|
+
_globals['_PROJECTSMANAGEMENTSERVICE']._serialized_end=3500
|
|
72
74
|
# @@protoc_insertion_point(module_scope)
|
qwak/__init__.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
from _qwak_proto.qwak.execution.v1.backfill_pb2 import BackfillSpec
|
|
2
|
+
from _qwak_proto.qwak.execution.v1.streaming_aggregation_pb2 import (
|
|
3
|
+
StreamingAggregationBackfillIngestion,
|
|
4
|
+
)
|
|
2
5
|
from _qwak_proto.qwak.execution.v1.batch_pb2 import BatchIngestion
|
|
3
6
|
from _qwak_proto.qwak.execution.v1.execution_service_pb2 import (
|
|
4
7
|
GetExecutionEntryRequest,
|
|
@@ -9,6 +12,8 @@ from _qwak_proto.qwak.execution.v1.execution_service_pb2 import (
|
|
|
9
12
|
TriggerBackfillResponse,
|
|
10
13
|
TriggerBatchFeaturesetRequest,
|
|
11
14
|
TriggerBatchFeaturesetResponse,
|
|
15
|
+
TriggerStreamingAggregationBackfillRequest,
|
|
16
|
+
TriggerStreamingAggregationBackfillResponse,
|
|
12
17
|
)
|
|
13
18
|
from _qwak_proto.qwak.execution.v1.execution_service_pb2_grpc import (
|
|
14
19
|
FeatureStoreExecutionServiceStub,
|
|
@@ -29,6 +34,29 @@ class ExecutionManagementClient:
|
|
|
29
34
|
grpc_channel
|
|
30
35
|
)
|
|
31
36
|
|
|
37
|
+
def trigger_streaming_aggregation_backfill(
|
|
38
|
+
self, backfill_ingestion: StreamingAggregationBackfillIngestion
|
|
39
|
+
) -> TriggerStreamingAggregationBackfillResponse:
|
|
40
|
+
"""
|
|
41
|
+
Receives a configured streaming aggregation backfill proto and triggers a streaming aggregation
|
|
42
|
+
backfill against the execution manager
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
backfill_ingestion (StreamingAggregationBackfillIngestion): A protobuf message
|
|
46
|
+
containing the backfill specification details
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
TriggerStreamingAggregationBackfillResponse: response object from the execution manager
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
return self._feature_store_execution_service.TriggerStreamingAggregationBackfill(
|
|
53
|
+
TriggerStreamingAggregationBackfillRequest(backfill=backfill_ingestion)
|
|
54
|
+
)
|
|
55
|
+
except RpcError as e:
|
|
56
|
+
raise QwakException(
|
|
57
|
+
f"Failed to trigger streaming aggregation backfill job, error encountered {e}"
|
|
58
|
+
)
|
|
59
|
+
|
|
32
60
|
def trigger_batch_backfill(
|
|
33
61
|
self, batch_backfill_spec: BackfillSpec
|
|
34
62
|
) -> TriggerBackfillResponse:
|
qwak/exceptions/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .quiet_error import QuietError
|
|
2
2
|
from .qwak_exception import QwakException
|
|
3
3
|
from .qwak_general_build_exception import QwakGeneralBuildException
|
|
4
|
+
from .qwak_grpc_address_exception import QwakGrpcAddressException
|
|
4
5
|
from .qwak_http_exception import QwakHTTPException
|
|
5
6
|
from .qwak_inference_exception import QwakInferenceException
|
|
6
7
|
from .qwak_load_configuration_exception import LoadConfigurationException
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
from urllib.parse import ParseResult
|
|
3
|
+
|
|
4
|
+
from qwak.exceptions import QwakException
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class QwakGrpcAddressException(QwakException):
|
|
8
|
+
def __init__(self, details: str, grpc_address: Union[str, ParseResult]):
|
|
9
|
+
self.message = f"Not a valid gRPC address: '{grpc_address}'. Details: {details}"
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
from qwak.clients.feature_store.execution_management_client import (
|
|
4
|
+
ExecutionManagementClient,
|
|
5
|
+
)
|
|
6
|
+
from qwak.feature_store.feature_sets.streaming_backfill import StreamingBackfill
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StreamingAggregationBackfill:
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
streaming_backfill: StreamingBackfill,
|
|
13
|
+
source_definition_path: pathlib.Path,
|
|
14
|
+
):
|
|
15
|
+
"""
|
|
16
|
+
Initialize the streaming aggregation backfill executor.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
streaming_backfill (StreamingBackfill): Specification containing the
|
|
20
|
+
featureset name, time range, data sources, and transformation
|
|
21
|
+
source_definition_path (Path): Path to the Python file containing the backfill
|
|
22
|
+
definition. Required for locating UDF artifacts.
|
|
23
|
+
"""
|
|
24
|
+
self._streaming_backfill = streaming_backfill
|
|
25
|
+
self._source_definition_path = source_definition_path
|
|
26
|
+
|
|
27
|
+
def trigger(self) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Triggers the streaming aggregation backfill execution.
|
|
30
|
+
|
|
31
|
+
Converts the backfill specification to proto format and sends it to
|
|
32
|
+
the execution manager to start the backfill job.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
str: The execution ID for tracking the backfill job status
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
QwakException: If the execution manager request fails
|
|
39
|
+
"""
|
|
40
|
+
backfill_proto = self._streaming_backfill._to_proto(
|
|
41
|
+
str(self._source_definition_path)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
execution_client = ExecutionManagementClient()
|
|
45
|
+
response = execution_client.trigger_streaming_aggregation_backfill(
|
|
46
|
+
backfill_proto
|
|
47
|
+
)
|
|
48
|
+
return response.execution_id
|
|
@@ -2,7 +2,7 @@ import collections
|
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
|
-
from datetime import datetime
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
6
|
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
|
|
7
7
|
|
|
8
8
|
from _qwak_proto.qwak.feature_store.features.execution_pb2 import (
|
|
@@ -21,6 +21,7 @@ from _qwak_proto.qwak.feature_store.sources.streaming_pb2 import (
|
|
|
21
21
|
StreamingSource,
|
|
22
22
|
StreamingSource as ProtoStreamingSource,
|
|
23
23
|
)
|
|
24
|
+
from google.protobuf.timestamp_pb2 import Timestamp as ProtoTimestamp
|
|
24
25
|
from qwak.clients.feature_store import FeatureRegistryClient
|
|
25
26
|
from qwak.exceptions import QwakException
|
|
26
27
|
from qwak.feature_store._common.artifact_utils import ArtifactSpec, ArtifactsUploader
|
|
@@ -34,7 +35,7 @@ from qwak.feature_store.feature_sets.metadata import (
|
|
|
34
35
|
set_metadata_on_function,
|
|
35
36
|
)
|
|
36
37
|
from qwak.feature_store.feature_sets.streaming_backfill import (
|
|
37
|
-
|
|
38
|
+
BackfillDataSource,
|
|
38
39
|
StreamingBackfill,
|
|
39
40
|
)
|
|
40
41
|
from qwak.feature_store.feature_sets.transformations import (
|
|
@@ -75,6 +76,7 @@ def feature_set(
|
|
|
75
76
|
key: Optional[str] = None,
|
|
76
77
|
auxiliary_sinks: List[BaseSink] = [],
|
|
77
78
|
repository: Optional[str] = None,
|
|
79
|
+
backfill_max_timestamp: Optional[datetime] = None,
|
|
78
80
|
):
|
|
79
81
|
"""
|
|
80
82
|
Creates a streaming feature set for the specified entity using the given streaming data sources.
|
|
@@ -110,6 +112,11 @@ def feature_set(
|
|
|
110
112
|
"""
|
|
111
113
|
|
|
112
114
|
def decorator(function):
|
|
115
|
+
if isinstance(function, StreamingBackfill):
|
|
116
|
+
raise QwakException(
|
|
117
|
+
"Backfill can no longer be defined as a decorator on the feature set, it must be triggered after feature set creation."
|
|
118
|
+
)
|
|
119
|
+
|
|
113
120
|
user_transformation = function()
|
|
114
121
|
FeaturesetUtils.validate_base_featureset_decorator(
|
|
115
122
|
user_transformation=user_transformation, entity=entity, key=key
|
|
@@ -120,10 +127,6 @@ def feature_set(
|
|
|
120
127
|
offline_scheduling_policy=offline_scheduling_policy,
|
|
121
128
|
)
|
|
122
129
|
|
|
123
|
-
streaming_backfill: Optional[StreamingBackfill] = (
|
|
124
|
-
StreamingBackfill.get_streaming_backfill_from_function(function=function)
|
|
125
|
-
)
|
|
126
|
-
|
|
127
130
|
fs_name = name or function.__name__
|
|
128
131
|
streaming_feature_set = StreamingFeatureSet(
|
|
129
132
|
name=fs_name,
|
|
@@ -150,7 +153,7 @@ def feature_set(
|
|
|
150
153
|
online_cluster_template=getattr(
|
|
151
154
|
function, _ONLINE_CLUSTER_SPEC, ClusterTemplate.SMALL
|
|
152
155
|
),
|
|
153
|
-
|
|
156
|
+
backfill_max_timestamp=backfill_max_timestamp,
|
|
154
157
|
__instance_module_path__=inspect.stack()[1].filename,
|
|
155
158
|
auxiliary_sinks=auxiliary_sinks,
|
|
156
159
|
)
|
|
@@ -197,55 +200,68 @@ def execution_specification(
|
|
|
197
200
|
@typechecked
|
|
198
201
|
def backfill(
|
|
199
202
|
*,
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
203
|
+
feature_set_name: str,
|
|
204
|
+
start_date: Optional[datetime],
|
|
205
|
+
end_date: Optional[datetime],
|
|
206
|
+
data_sources: Union[List[str], List[BackfillDataSource]],
|
|
204
207
|
backfill_cluster_template: Optional[ClusterTemplate] = ClusterTemplate.SMALL,
|
|
205
208
|
):
|
|
206
209
|
"""
|
|
207
|
-
|
|
210
|
+
Triggers a backfill execution for an existing streaming featureset. Currently available for streaming
|
|
208
211
|
aggregation featuresets only.
|
|
209
212
|
|
|
210
|
-
:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
data_sources=["users_registration_stream"],
|
|
223
|
-
timestamp_column_name="reg_date"
|
|
224
|
-
)
|
|
213
|
+
Args:
|
|
214
|
+
feature_set_name (str): Name of the FeatureSet to trigger a backfill for.
|
|
215
|
+
start_date (datetime): Backfill start date, on Streaming Aggregation Feature Sets,
|
|
216
|
+
needs to align with the FeatureSet tiles.
|
|
217
|
+
end_date (datetime): Backfill end date, on Streaming Aggregation Feature Sets,
|
|
218
|
+
needs to align with the FeatureSet tiles and be smaller than the Feature Set's backfill_max_timestamp.
|
|
219
|
+
data_sources (list[BackfillDataSource] | list[str]): A list of BackfillDataSource objects containing
|
|
220
|
+
batch source name and optional time range, or a list of batch source names (with no time range limits).
|
|
221
|
+
backfill_cluster_template (ClusterTemplate, optional): An optional cluster specification for the backfill job.
|
|
222
|
+
Defaults to SMALL.
|
|
223
|
+
|
|
224
|
+
Examples:
|
|
225
225
|
@streaming.backfill(
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
226
|
+
feature_set_name="user_streaming_agg_features",
|
|
227
|
+
start_date=datetime(2022,1,1,0,0,0),
|
|
228
|
+
end_date=datetime(2023,9,1,0,0,0),
|
|
229
|
+
data_sources=[BackfillDataSource(data_source_name="backfill_data_source",
|
|
230
|
+
start_datetime=datetime(2023,1,1,0,0,0),
|
|
231
|
+
end_datetime=datetime(2023,8,1,0,0,0))],
|
|
230
232
|
backfill_cluster_template=ClusterTemplate.SMALL
|
|
231
|
-
backfill_transformation=SparkSqlTransformation("SELECT user_id, reg_country, reg_date FROM backfill_data_source")
|
|
232
233
|
)
|
|
233
|
-
def
|
|
234
|
-
return SparkSqlTransformation("SELECT user_id, reg_country, reg_date FROM
|
|
234
|
+
def backfill_transformation():
|
|
235
|
+
return SparkSqlTransformation("SELECT user_id, reg_country, reg_date FROM backfill_data_source")
|
|
235
236
|
"""
|
|
236
237
|
|
|
237
238
|
def decorator(function):
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
239
|
+
if isinstance(function, StreamingFeatureSet):
|
|
240
|
+
raise QwakException(
|
|
241
|
+
"Backfill can no longer be defined as a decorator on the feature set, it must be triggered after feature set creation."
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
backfill_transformation: SparkSqlTransformation = function()
|
|
245
|
+
|
|
246
|
+
if not isinstance(backfill_transformation, SparkSqlTransformation):
|
|
247
|
+
raise QwakException(
|
|
248
|
+
"Backfill must defined on a method returning a SparkSqlTransformation"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
streaming_backfill = StreamingBackfill(
|
|
252
|
+
featureset_name=feature_set_name,
|
|
253
|
+
start_datetime=start_date,
|
|
254
|
+
end_datetime=end_date,
|
|
255
|
+
data_sources=StreamingBackfill._get_normalized_backfill_sources_spec(
|
|
256
|
+
data_sources
|
|
257
|
+
),
|
|
258
|
+
transform=backfill_transformation,
|
|
259
|
+
cluster_template=backfill_cluster_template,
|
|
246
260
|
)
|
|
247
261
|
|
|
248
|
-
|
|
262
|
+
functools.update_wrapper(streaming_backfill, backfill_transformation)
|
|
263
|
+
|
|
264
|
+
return streaming_backfill
|
|
249
265
|
|
|
250
266
|
return decorator
|
|
251
267
|
|
|
@@ -316,7 +332,7 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
316
332
|
offline_cluster_template: Optional[ClusterTemplate] = None
|
|
317
333
|
online_cluster_template: Optional[ClusterTemplate] = None
|
|
318
334
|
metadata: Optional[Metadata] = None
|
|
319
|
-
|
|
335
|
+
backfill_max_timestamp: Optional[datetime] = None
|
|
320
336
|
auxiliary_sinks: List[BaseSink] = field(default_factory=lambda: [])
|
|
321
337
|
|
|
322
338
|
def __post_init__(self):
|
|
@@ -399,7 +415,6 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
399
415
|
proto_featureset_type = self._get_streaming_aggregation_featureset_proto(
|
|
400
416
|
artifact_url=artifact_url,
|
|
401
417
|
streaming_sources=data_sources,
|
|
402
|
-
feature_registry=feature_registry,
|
|
403
418
|
initial_tile_size=maybe_initial_tile_size,
|
|
404
419
|
)
|
|
405
420
|
|
|
@@ -453,10 +468,9 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
453
468
|
"Auxiliary Sinks Are not supported in Streaming Aggregation Feature Sets"
|
|
454
469
|
)
|
|
455
470
|
|
|
456
|
-
|
|
457
|
-
if self.backfill and not is_streaming_agg:
|
|
471
|
+
if self.backfill_max_timestamp and not is_streaming_agg:
|
|
458
472
|
raise QwakException(
|
|
459
|
-
"
|
|
473
|
+
"backfill_max_timestamp can only be set for Streaming Aggregation FeatureSet."
|
|
460
474
|
)
|
|
461
475
|
|
|
462
476
|
# Validate transformation is PySpark when multiple data sources are used
|
|
@@ -515,18 +529,29 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
515
529
|
)
|
|
516
530
|
raise QwakException(error_message_str)
|
|
517
531
|
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
532
|
+
if not self.backfill_max_timestamp:
|
|
533
|
+
raise QwakException(
|
|
534
|
+
"""
|
|
535
|
+
backfill_max_timestamp must be set for Streaming Aggregation FeatureSet.
|
|
536
|
+
Events earlier than this timestamp can only be processed by triggering backfill,
|
|
537
|
+
the Streaming job will not process events that are earlier than this timestamp.
|
|
538
|
+
"""
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
self._validate_streaming_aggregation_backfill_max_timestamp()
|
|
521
542
|
|
|
522
543
|
return initial_tile_size
|
|
523
544
|
|
|
524
|
-
def
|
|
545
|
+
def _validate_streaming_aggregation_backfill_max_timestamp(self):
|
|
525
546
|
initial_tile_size, _ = StreamingFeatureSet._get_default_slide_period(
|
|
526
547
|
self.transformation.windows
|
|
527
548
|
)
|
|
528
549
|
|
|
529
|
-
self.
|
|
550
|
+
if self.backfill_max_timestamp.timestamp() % initial_tile_size != 0:
|
|
551
|
+
raise QwakException(
|
|
552
|
+
f"Chosen backfill max timestamp is invalid,"
|
|
553
|
+
f" it has to be exactly dividable by slice size of {initial_tile_size} seconds."
|
|
554
|
+
)
|
|
530
555
|
|
|
531
556
|
@staticmethod
|
|
532
557
|
def _get_default_slide_period(
|
|
@@ -596,9 +621,12 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
596
621
|
self,
|
|
597
622
|
artifact_url: Optional[str],
|
|
598
623
|
streaming_sources: List[StreamingSource],
|
|
599
|
-
feature_registry: FeatureRegistryClient,
|
|
600
624
|
initial_tile_size: int,
|
|
601
625
|
) -> ProtoFeatureSetType:
|
|
626
|
+
backfill_max_timestamp = ProtoTimestamp()
|
|
627
|
+
backfill_max_timestamp.FromDatetime(
|
|
628
|
+
self.backfill_max_timestamp.astimezone(timezone.utc)
|
|
629
|
+
)
|
|
602
630
|
return ProtoFeatureSetType(
|
|
603
631
|
streaming_aggregation_feature_set=ProtoStreamingAggregationFeatureSet(
|
|
604
632
|
transformation=self.transformation._to_proto(
|
|
@@ -621,14 +649,7 @@ class StreamingFeatureSet(BaseFeatureSet):
|
|
|
621
649
|
allowed_late_arrival_seconds=60 * 10,
|
|
622
650
|
aggregations=self.transformation._get_aggregations_proto(),
|
|
623
651
|
),
|
|
624
|
-
backfill_spec=
|
|
625
|
-
|
|
626
|
-
feature_registry=feature_registry,
|
|
627
|
-
original_instance_module_path=self.__instance_module_path__,
|
|
628
|
-
featureset_name=self.name,
|
|
629
|
-
)
|
|
630
|
-
if self.backfill
|
|
631
|
-
else None
|
|
632
|
-
),
|
|
652
|
+
backfill_spec=None,
|
|
653
|
+
backfill_max_timestamp=backfill_max_timestamp,
|
|
633
654
|
)
|
|
634
655
|
)
|