qwak-core 0.4.246__py3-none-any.whl → 0.4.248__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _qwak_proto/qwak/service_discovery/service_discovery_location_pb2.py +65 -0
- _qwak_proto/qwak/service_discovery/service_discovery_location_pb2.pyi +73 -0
- _qwak_proto/qwak/service_discovery/service_discovery_location_pb2_grpc.py +4 -0
- _qwak_proto/qwak/service_discovery/service_discovery_location_service_pb2.py +49 -0
- _qwak_proto/qwak/service_discovery/service_discovery_location_service_pb2.pyi +41 -0
- _qwak_proto/qwak/service_discovery/service_discovery_location_service_pb2_grpc.py +231 -0
- qwak/__init__.py +1 -1
- qwak/clients/feature_store/offline_serving_client.py +29 -4
- qwak/clients/location_discovery/__init__.py +1 -0
- qwak/clients/location_discovery/client.py +73 -0
- qwak/feature_store/_common/functions.py +0 -19
- qwak/feature_store/offline/__init__.py +1 -2
- qwak/inner/model_loggers_utils.py +8 -20
- qwak/model_loggers/artifact_logger.py +7 -2
- qwak/model_loggers/data_logger.py +11 -6
- {qwak_core-0.4.246.dist-info → qwak_core-0.4.248.dist-info}/METADATA +1 -1
- {qwak_core-0.4.246.dist-info → qwak_core-0.4.248.dist-info}/RECORD +21 -17
- qwak_services_mock/mocks/location_discovery_service_api.py +104 -0
- qwak_services_mock/mocks/qwak_mocks.py +4 -0
- qwak_services_mock/services_mock.py +13 -0
- qwak/feature_store/_common/featureset_asterisk_handler.py +0 -115
- qwak/feature_store/offline/_query_engine.py +0 -32
- qwak/feature_store/offline/athena/__init__.py +0 -0
- qwak/feature_store/offline/athena/athena_query_engine.py +0 -153
- qwak/feature_store/offline/client.py +0 -718
- {qwak_core-0.4.246.dist-info → qwak_core-0.4.248.dist-info}/WHEEL +0 -0
@@ -1,153 +0,0 @@
|
|
1
|
-
import time
|
2
|
-
import uuid
|
3
|
-
|
4
|
-
import pandas as pd
|
5
|
-
from _qwak_proto.qwak.ecosystem.v0.ecosystem_runtime_service_pb2 import (
|
6
|
-
GetCloudCredentialsParameters,
|
7
|
-
GetCloudCredentialsRequest,
|
8
|
-
OfflineFeatureStoreClient,
|
9
|
-
PermissionSet,
|
10
|
-
)
|
11
|
-
from google.protobuf.duration_pb2 import Duration
|
12
|
-
from qwak.clients.administration.eco_system.client import EcosystemClient
|
13
|
-
from qwak.exceptions import QwakException
|
14
|
-
from qwak.feature_store.offline._query_engine import BaseQueryEngine
|
15
|
-
|
16
|
-
RECONNECT_THRESHOLD_SEC = 300
|
17
|
-
|
18
|
-
|
19
|
-
class AthenaQueryEngine(BaseQueryEngine):
|
20
|
-
def __init__(self):
|
21
|
-
eco_client = EcosystemClient()
|
22
|
-
self.bucket, environment_id = self._get_env_details(eco_client)
|
23
|
-
|
24
|
-
self.staging_folder_prefix = (
|
25
|
-
f"{environment_id}/tmp/offline_fs/{str(uuid.uuid4())}" # nosec B108
|
26
|
-
)
|
27
|
-
self.temp_join_table_base_folder = (
|
28
|
-
f"s3://{self.bucket}/{self.staging_folder_prefix}"
|
29
|
-
)
|
30
|
-
|
31
|
-
self.conn, self.expiration_time = self._init_connection()
|
32
|
-
self.cursor = self.conn.cursor()
|
33
|
-
|
34
|
-
self.join_table_specs = []
|
35
|
-
|
36
|
-
self.join_tables_db_name = f"qwak_temp_data_{environment_id.replace('-', '_')}"
|
37
|
-
self.cursor.execute(f"CREATE DATABASE IF NOT EXISTS {self.join_tables_db_name}")
|
38
|
-
|
39
|
-
@staticmethod
|
40
|
-
def _get_env_details(eco_client):
|
41
|
-
environment_configuration = eco_client.get_environment_configuration()
|
42
|
-
|
43
|
-
return (
|
44
|
-
environment_configuration.configuration.object_storage_bucket,
|
45
|
-
environment_configuration.id,
|
46
|
-
)
|
47
|
-
|
48
|
-
def _init_connection(self):
|
49
|
-
try:
|
50
|
-
# obtain credentials through STS
|
51
|
-
eco_client = EcosystemClient()
|
52
|
-
cloud_credentials_response = eco_client.get_cloud_credentials(
|
53
|
-
request=GetCloudCredentialsRequest(
|
54
|
-
parameters=GetCloudCredentialsParameters(
|
55
|
-
duration=Duration(seconds=60 * 60, nanos=0),
|
56
|
-
permission_set=PermissionSet(
|
57
|
-
offline_feature_store_client=OfflineFeatureStoreClient()
|
58
|
-
),
|
59
|
-
)
|
60
|
-
)
|
61
|
-
)
|
62
|
-
|
63
|
-
aws_credentials = (
|
64
|
-
cloud_credentials_response.cloud_credentials.aws_temporary_credentials
|
65
|
-
)
|
66
|
-
|
67
|
-
try:
|
68
|
-
from pyathena import connect
|
69
|
-
from pyathena.pandas.cursor import PandasCursor
|
70
|
-
except ImportError:
|
71
|
-
raise QwakException(
|
72
|
-
"""
|
73
|
-
Missing 'pyathena' dependency required for fetching data from the offline store.
|
74
|
-
Please pip install pyathena
|
75
|
-
"""
|
76
|
-
)
|
77
|
-
|
78
|
-
conn = connect(
|
79
|
-
s3_staging_dir=self.temp_join_table_base_folder,
|
80
|
-
aws_access_key_id=aws_credentials.access_key_id,
|
81
|
-
aws_secret_access_key=aws_credentials.secret_access_key,
|
82
|
-
aws_session_token=aws_credentials.session_token,
|
83
|
-
region_name=aws_credentials.region,
|
84
|
-
cursor_class=PandasCursor,
|
85
|
-
)
|
86
|
-
|
87
|
-
return (
|
88
|
-
conn,
|
89
|
-
aws_credentials.expiration_time.seconds,
|
90
|
-
)
|
91
|
-
|
92
|
-
except QwakException as e:
|
93
|
-
raise e
|
94
|
-
|
95
|
-
except Exception as e:
|
96
|
-
raise QwakException(
|
97
|
-
f"Got an error trying to retrieve credentials to query the offline store "
|
98
|
-
f"in the cloud, error is: {e}"
|
99
|
-
)
|
100
|
-
|
101
|
-
def upload_table(self, df: pd.DataFrame):
|
102
|
-
join_table_spec = super().JoinTableSpec(
|
103
|
-
self.join_tables_db_name, AthenaQueryEngine.get_quotes()
|
104
|
-
)
|
105
|
-
self.join_table_specs.append(join_table_spec)
|
106
|
-
|
107
|
-
from pyathena.pandas.util import to_sql
|
108
|
-
|
109
|
-
to_sql(
|
110
|
-
df,
|
111
|
-
join_table_spec.table_name,
|
112
|
-
self.conn,
|
113
|
-
f"{self.temp_join_table_base_folder}/{join_table_spec.table_name}/",
|
114
|
-
schema=self.join_tables_db_name,
|
115
|
-
index=False,
|
116
|
-
if_exists="replace",
|
117
|
-
)
|
118
|
-
|
119
|
-
return join_table_spec.join_table_full_path
|
120
|
-
|
121
|
-
def run_query(self, query: str):
|
122
|
-
self._check_reconnection()
|
123
|
-
return self.cursor.execute(query).fetchall()
|
124
|
-
|
125
|
-
def read_pandas_from_query(self, query: str, parse_dates=None):
|
126
|
-
self._check_reconnection()
|
127
|
-
return pd.read_sql(
|
128
|
-
query,
|
129
|
-
self.conn,
|
130
|
-
parse_dates=parse_dates,
|
131
|
-
)
|
132
|
-
|
133
|
-
def _check_reconnection(self):
|
134
|
-
if self.expiration_time - time.time() < RECONNECT_THRESHOLD_SEC:
|
135
|
-
self.conn, self.expiration_time = self._init_connection()
|
136
|
-
self.cursor = self.conn.cursor()
|
137
|
-
|
138
|
-
def cleanup(self):
|
139
|
-
self._check_reconnection()
|
140
|
-
for join_table_spec in self.join_table_specs:
|
141
|
-
self.cursor.execute(
|
142
|
-
f"""DROP TABLE {join_table_spec.join_table_full_path.replace('"', '`')}"""
|
143
|
-
)
|
144
|
-
|
145
|
-
self.join_table_specs = []
|
146
|
-
|
147
|
-
s3 = self.conn.session.resource("s3")
|
148
|
-
bucket = s3.Bucket(self.bucket)
|
149
|
-
bucket.objects.filter(Prefix=self.staging_folder_prefix).delete()
|
150
|
-
|
151
|
-
@staticmethod
|
152
|
-
def get_quotes():
|
153
|
-
return '"'
|