apache-airflow-providers-google 11.0.0rc1__py3-none-any.whl → 12.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/assets/gcs.py +1 -7
- airflow/providers/google/cloud/hooks/alloy_db.py +289 -0
- airflow/providers/google/cloud/hooks/cloud_batch.py +13 -5
- airflow/providers/google/cloud/hooks/dataproc.py +7 -3
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +41 -22
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +7 -38
- airflow/providers/google/cloud/hooks/translate.py +355 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +147 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +10 -0
- airflow/providers/google/cloud/links/alloy_db.py +55 -0
- airflow/providers/google/cloud/links/translate.py +98 -0
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +1 -5
- airflow/providers/google/cloud/openlineage/mixins.py +4 -12
- airflow/providers/google/cloud/openlineage/utils.py +200 -22
- airflow/providers/google/cloud/operators/alloy_db.py +459 -0
- airflow/providers/google/cloud/operators/automl.py +55 -44
- airflow/providers/google/cloud/operators/bigquery.py +60 -15
- airflow/providers/google/cloud/operators/dataproc.py +12 -0
- airflow/providers/google/cloud/operators/gcs.py +5 -14
- airflow/providers/google/cloud/operators/kubernetes_engine.py +377 -705
- airflow/providers/google/cloud/operators/mlengine.py +41 -31
- airflow/providers/google/cloud/operators/translate.py +586 -1
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +163 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +5 -0
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/__init__.py +16 -0
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +112 -0
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +6 -11
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +3 -0
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +3 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -10
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +3 -15
- airflow/providers/google/cloud/transfers/gcs_to_local.py +9 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +41 -6
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +15 -0
- airflow/providers/google/get_provider_info.py +30 -18
- airflow/providers/google/version_compat.py +36 -0
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/METADATA +20 -22
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/RECORD +42 -37
- airflow/providers/google/cloud/hooks/datapipeline.py +0 -71
- airflow/providers/google/cloud/openlineage/BigQueryErrorRunFacet.json +0 -30
- airflow/providers/google/cloud/operators/datapipeline.py +0 -63
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-11.0.0rc1.dist-info → apache_airflow_providers_google-12.0.0.dist-info}/entry_points.txt +0 -0
@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
|
|
29
29
|
|
30
30
|
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
__version__ = "
|
32
|
+
__version__ = "12.0.0"
|
33
33
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
35
|
-
"2.
|
35
|
+
"2.9.0"
|
36
36
|
):
|
37
37
|
raise RuntimeError(
|
38
|
-
f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.
|
38
|
+
f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.9.0+"
|
39
39
|
)
|
@@ -19,6 +19,7 @@ from __future__ import annotations
|
|
19
19
|
from typing import TYPE_CHECKING
|
20
20
|
|
21
21
|
from airflow.providers.google.cloud.hooks.gcs import _parse_gcs_url
|
22
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_0_PLUS
|
22
23
|
|
23
24
|
if TYPE_CHECKING:
|
24
25
|
from urllib.parse import SplitResult
|
@@ -26,16 +27,9 @@ if TYPE_CHECKING:
|
|
26
27
|
from airflow.providers.common.compat.assets import Asset
|
27
28
|
from airflow.providers.common.compat.openlineage.facet import Dataset as OpenLineageDataset
|
28
29
|
else:
|
29
|
-
# TODO: Remove this try-exception block after bumping common provider to 1.3.0
|
30
|
-
# This is due to common provider AssetDetails import error handling
|
31
30
|
try:
|
32
31
|
from airflow.providers.common.compat.assets import Asset
|
33
32
|
except ImportError:
|
34
|
-
from packaging.version import Version
|
35
|
-
|
36
|
-
from airflow import __version__ as AIRFLOW_VERSION
|
37
|
-
|
38
|
-
AIRFLOW_V_3_0_PLUS = Version(Version(AIRFLOW_VERSION).base_version) >= Version("3.0.0")
|
39
33
|
if AIRFLOW_V_3_0_PLUS:
|
40
34
|
from airflow.sdk.definitions.asset import Asset
|
41
35
|
else:
|
@@ -0,0 +1,289 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
"""Module contains a Google Alloy DB Hook."""
|
19
|
+
|
20
|
+
from __future__ import annotations
|
21
|
+
|
22
|
+
from collections.abc import Sequence
|
23
|
+
from copy import deepcopy
|
24
|
+
from typing import TYPE_CHECKING
|
25
|
+
|
26
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
27
|
+
from google.cloud import alloydb_v1
|
28
|
+
|
29
|
+
from airflow.exceptions import AirflowException
|
30
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
31
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
32
|
+
|
33
|
+
if TYPE_CHECKING:
|
34
|
+
import proto
|
35
|
+
from google.api_core.operation import Operation
|
36
|
+
from google.api_core.retry import Retry
|
37
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
38
|
+
|
39
|
+
|
40
|
+
class AlloyDbHook(GoogleBaseHook):
|
41
|
+
"""Google Alloy DB Hook."""
|
42
|
+
|
43
|
+
def __init__(self, *args, **kwargs):
|
44
|
+
super().__init__(*args, **kwargs)
|
45
|
+
self._client: alloydb_v1.AlloyDBAdminClient | None = None
|
46
|
+
|
47
|
+
def get_alloy_db_admin_client(self) -> alloydb_v1.AlloyDBAdminClient:
|
48
|
+
"""Retrieve AlloyDB client."""
|
49
|
+
if not self._client:
|
50
|
+
self._client = alloydb_v1.AlloyDBAdminClient(
|
51
|
+
credentials=self.get_credentials(), client_info=CLIENT_INFO
|
52
|
+
)
|
53
|
+
return self._client
|
54
|
+
|
55
|
+
def wait_for_operation(self, timeout: float | None, operation: Operation) -> proto.Message:
|
56
|
+
"""Wait for long-lasting operation to complete."""
|
57
|
+
self.log.info("Waiting for operation to complete...")
|
58
|
+
_timeout: int | None = int(timeout) if timeout else None
|
59
|
+
try:
|
60
|
+
return operation.result(timeout=_timeout)
|
61
|
+
except Exception:
|
62
|
+
error = operation.exception(timeout=_timeout)
|
63
|
+
raise AirflowException(error)
|
64
|
+
|
65
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
66
|
+
def create_cluster(
|
67
|
+
self,
|
68
|
+
cluster_id: str,
|
69
|
+
cluster: alloydb_v1.Cluster | dict,
|
70
|
+
location: str,
|
71
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
72
|
+
request_id: str | None = None,
|
73
|
+
validate_only: bool = False,
|
74
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
75
|
+
timeout: float | None = None,
|
76
|
+
metadata: Sequence[tuple[str, str]] = (),
|
77
|
+
) -> Operation:
|
78
|
+
"""
|
79
|
+
Create an Alloy DB cluster.
|
80
|
+
|
81
|
+
.. seealso::
|
82
|
+
For more details see API documentation:
|
83
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.CreateClusterRequest
|
84
|
+
|
85
|
+
:param cluster_id: Required. ID of the cluster to create.
|
86
|
+
:param cluster: Required. Cluster to create. For more details please see API documentation:
|
87
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.Cluster
|
88
|
+
:param location: Required. The ID of the Google Cloud region where the cluster is located.
|
89
|
+
:param project_id: Optional. The ID of the Google Cloud project where the cluster is located.
|
90
|
+
:param request_id: Optional. The ID of an existing request object.
|
91
|
+
:param validate_only: Optional. If set, performs request validation, but does not actually execute
|
92
|
+
the create request.
|
93
|
+
:param retry: Optional. Designation of what errors, if any, should be retried.
|
94
|
+
:param timeout: Optional. The timeout for this request.
|
95
|
+
:param metadata: Optional. Strings which should be sent along with the request as metadata.
|
96
|
+
"""
|
97
|
+
client = self.get_alloy_db_admin_client()
|
98
|
+
return client.create_cluster(
|
99
|
+
request={
|
100
|
+
"parent": client.common_location_path(project_id, location),
|
101
|
+
"cluster_id": cluster_id,
|
102
|
+
"cluster": cluster,
|
103
|
+
"request_id": request_id,
|
104
|
+
"validate_only": validate_only,
|
105
|
+
},
|
106
|
+
retry=retry,
|
107
|
+
timeout=timeout,
|
108
|
+
metadata=metadata,
|
109
|
+
)
|
110
|
+
|
111
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
112
|
+
def create_secondary_cluster(
|
113
|
+
self,
|
114
|
+
cluster_id: str,
|
115
|
+
cluster: alloydb_v1.Cluster | dict,
|
116
|
+
location: str,
|
117
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
118
|
+
request_id: str | None = None,
|
119
|
+
validate_only: bool = False,
|
120
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
121
|
+
timeout: float | None = None,
|
122
|
+
metadata: Sequence[tuple[str, str]] = (),
|
123
|
+
) -> Operation:
|
124
|
+
"""
|
125
|
+
Create a secondary Alloy DB cluster.
|
126
|
+
|
127
|
+
.. seealso::
|
128
|
+
For more details see API documentation:
|
129
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.CreateClusterRequest
|
130
|
+
|
131
|
+
:param cluster_id: Required. ID of the cluster to create.
|
132
|
+
:param cluster: Required. Cluster to create. For more details please see API documentation:
|
133
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.Cluster
|
134
|
+
:param location: Required. The ID of the Google Cloud region where the cluster is located.
|
135
|
+
:param project_id: Optional. The ID of the Google Cloud project where the cluster is located.
|
136
|
+
:param request_id: Optional. The ID of an existing request object.
|
137
|
+
:param validate_only: Optional. If set, performs request validation, but does not actually execute
|
138
|
+
the create request.
|
139
|
+
:param retry: Optional. Designation of what errors, if any, should be retried.
|
140
|
+
:param timeout: Optional. The timeout for this request.
|
141
|
+
:param metadata: Optional. Strings which should be sent along with the request as metadata.
|
142
|
+
"""
|
143
|
+
client = self.get_alloy_db_admin_client()
|
144
|
+
return client.create_secondary_cluster(
|
145
|
+
request={
|
146
|
+
"parent": client.common_location_path(project_id, location),
|
147
|
+
"cluster_id": cluster_id,
|
148
|
+
"cluster": cluster,
|
149
|
+
"request_id": request_id,
|
150
|
+
"validate_only": validate_only,
|
151
|
+
},
|
152
|
+
retry=retry,
|
153
|
+
timeout=timeout,
|
154
|
+
metadata=metadata,
|
155
|
+
)
|
156
|
+
|
157
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
158
|
+
def get_cluster(
|
159
|
+
self,
|
160
|
+
cluster_id: str,
|
161
|
+
location: str,
|
162
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
163
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
164
|
+
timeout: float | None = None,
|
165
|
+
metadata: Sequence[tuple[str, str]] = (),
|
166
|
+
) -> alloydb_v1.Cluster:
|
167
|
+
"""
|
168
|
+
Retrieve an Alloy DB cluster.
|
169
|
+
|
170
|
+
.. seealso::
|
171
|
+
For more details see API documentation:
|
172
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.GetClusterRequest
|
173
|
+
|
174
|
+
:param cluster_id: Required. ID of the cluster to create.
|
175
|
+
:param location: Required. The ID of the Google Cloud region where the cluster is located.
|
176
|
+
:param project_id: Optional. The ID of the Google Cloud project where the cluster is located.
|
177
|
+
:param retry: Optional. Designation of what errors, if any, should be retried.
|
178
|
+
:param timeout: Optional. The timeout for this request.
|
179
|
+
:param metadata: Optional. Strings which should be sent along with the request as metadata.
|
180
|
+
"""
|
181
|
+
client = self.get_alloy_db_admin_client()
|
182
|
+
return client.get_cluster(
|
183
|
+
request={"name": client.cluster_path(project_id, location, cluster_id)},
|
184
|
+
retry=retry,
|
185
|
+
timeout=timeout,
|
186
|
+
metadata=metadata,
|
187
|
+
)
|
188
|
+
|
189
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
190
|
+
def update_cluster(
|
191
|
+
self,
|
192
|
+
cluster_id: str,
|
193
|
+
cluster: alloydb_v1.Cluster | dict,
|
194
|
+
location: str,
|
195
|
+
update_mask: FieldMask | dict | None = None,
|
196
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
197
|
+
allow_missing: bool = False,
|
198
|
+
request_id: str | None = None,
|
199
|
+
validate_only: bool = False,
|
200
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
201
|
+
timeout: float | None = None,
|
202
|
+
metadata: Sequence[tuple[str, str]] = (),
|
203
|
+
) -> Operation:
|
204
|
+
"""
|
205
|
+
Update an Alloy DB cluster.
|
206
|
+
|
207
|
+
.. seealso::
|
208
|
+
For more details see API documentation:
|
209
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.UpdateClusterRequest
|
210
|
+
|
211
|
+
:param cluster_id: Required. ID of the cluster to update.
|
212
|
+
:param cluster: Required. Cluster to create. For more details please see API documentation:
|
213
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.Cluster
|
214
|
+
:param location: Required. The ID of the Google Cloud region where the cluster is located.
|
215
|
+
:param update_mask: Optional. Field mask is used to specify the fields to be overwritten in the
|
216
|
+
Cluster resource by the update.
|
217
|
+
:param request_id: Optional. The ID of an existing request object.
|
218
|
+
:param validate_only: Optional. If set, performs request validation, but does not actually execute
|
219
|
+
the create request.
|
220
|
+
:param project_id: Optional. The ID of the Google Cloud project where the cluster is located.
|
221
|
+
:param allow_missing: Optional. If set to true, update succeeds even if cluster is not found.
|
222
|
+
In that case, a new cluster is created and update_mask is ignored.
|
223
|
+
:param retry: Optional. Designation of what errors, if any, should be retried.
|
224
|
+
:param timeout: Optional. The timeout for this request.
|
225
|
+
:param metadata: Optional. Strings which should be sent along with the request as metadata.
|
226
|
+
"""
|
227
|
+
client = self.get_alloy_db_admin_client()
|
228
|
+
_cluster = deepcopy(cluster) if isinstance(cluster, dict) else alloydb_v1.Cluster.to_dict(cluster)
|
229
|
+
_cluster["name"] = client.cluster_path(project_id, location, cluster_id)
|
230
|
+
return client.update_cluster(
|
231
|
+
request={
|
232
|
+
"update_mask": update_mask,
|
233
|
+
"cluster": _cluster,
|
234
|
+
"request_id": request_id,
|
235
|
+
"validate_only": validate_only,
|
236
|
+
"allow_missing": allow_missing,
|
237
|
+
},
|
238
|
+
retry=retry,
|
239
|
+
timeout=timeout,
|
240
|
+
metadata=metadata,
|
241
|
+
)
|
242
|
+
|
243
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
244
|
+
def delete_cluster(
|
245
|
+
self,
|
246
|
+
cluster_id: str,
|
247
|
+
location: str,
|
248
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
249
|
+
request_id: str | None = None,
|
250
|
+
etag: str | None = None,
|
251
|
+
validate_only: bool = False,
|
252
|
+
force: bool = False,
|
253
|
+
retry: Retry | _MethodDefault = DEFAULT,
|
254
|
+
timeout: float | None = None,
|
255
|
+
metadata: Sequence[tuple[str, str]] = (),
|
256
|
+
) -> Operation:
|
257
|
+
"""
|
258
|
+
Delete an Alloy DB cluster.
|
259
|
+
|
260
|
+
.. seealso::
|
261
|
+
For more details see API documentation:
|
262
|
+
https://cloud.google.com/python/docs/reference/alloydb/latest/google.cloud.alloydb_v1.types.DeleteClusterRequest
|
263
|
+
|
264
|
+
:param cluster_id: Required. ID of the cluster to delete.
|
265
|
+
:param location: Required. The ID of the Google Cloud region where the cluster is located.
|
266
|
+
:param project_id: Optional. The ID of the Google Cloud project where the cluster is located.
|
267
|
+
:param request_id: Optional. The ID of an existing request object.
|
268
|
+
:param etag: Optional. The current etag of the Cluster. If an etag is provided and does not match the
|
269
|
+
current etag of the Cluster, deletion will be blocked and an ABORTED error will be returned.
|
270
|
+
:param validate_only: Optional. If set, performs request validation, but does not actually execute
|
271
|
+
the create request.
|
272
|
+
:param force: Optional. Whether to cascade delete child instances for given cluster.
|
273
|
+
:param retry: Optional. Designation of what errors, if any, should be retried.
|
274
|
+
:param timeout: Optional. The timeout for this request.
|
275
|
+
:param metadata: Optional. Strings which should be sent along with the request as metadata.
|
276
|
+
"""
|
277
|
+
client = self.get_alloy_db_admin_client()
|
278
|
+
return client.delete_cluster(
|
279
|
+
request={
|
280
|
+
"name": client.cluster_path(project_id, location, cluster_id),
|
281
|
+
"request_id": request_id,
|
282
|
+
"etag": etag,
|
283
|
+
"validate_only": validate_only,
|
284
|
+
"force": force,
|
285
|
+
},
|
286
|
+
retry=retry,
|
287
|
+
timeout=timeout,
|
288
|
+
metadata=metadata,
|
289
|
+
)
|
@@ -145,12 +145,20 @@ class CloudBatchHook(GoogleBaseHook):
|
|
145
145
|
try:
|
146
146
|
job = client.get_job(name=f"{job_name}")
|
147
147
|
status: JobStatus.State = job.status.state
|
148
|
-
if
|
149
|
-
status == JobStatus.State.SUCCEEDED
|
150
|
-
or status == JobStatus.State.FAILED
|
151
|
-
or status == JobStatus.State.DELETION_IN_PROGRESS
|
152
|
-
):
|
148
|
+
if status == JobStatus.State.SUCCEEDED:
|
153
149
|
return job
|
150
|
+
elif status == JobStatus.State.FAILED:
|
151
|
+
message = (
|
152
|
+
"Unexpected error in the operation: "
|
153
|
+
"Batch job with name {job_name} has failed its execution."
|
154
|
+
)
|
155
|
+
raise AirflowException(message)
|
156
|
+
elif status == JobStatus.State.DELETION_IN_PROGRESS:
|
157
|
+
message = (
|
158
|
+
"Unexpected error in the operation: "
|
159
|
+
"Batch job with name {job_name} is being deleted."
|
160
|
+
)
|
161
|
+
raise AirflowException(message)
|
154
162
|
else:
|
155
163
|
time.sleep(polling_period_seconds)
|
156
164
|
except Exception as e:
|
@@ -116,13 +116,17 @@ class DataProcJobBuilder:
|
|
116
116
|
if args is not None:
|
117
117
|
self.job["job"][self.job_type]["args"] = args
|
118
118
|
|
119
|
-
def add_query(self, query: str) -> None:
|
119
|
+
def add_query(self, query: str | list[str]) -> None:
|
120
120
|
"""
|
121
|
-
|
121
|
+
Add query for Dataproc job.
|
122
122
|
|
123
123
|
:param query: query for the job.
|
124
124
|
"""
|
125
|
-
self.job["job"][self.job_type]
|
125
|
+
queries = self.job["job"][self.job_type].setdefault("query_list", {"queries": []})["queries"]
|
126
|
+
if isinstance(query, str):
|
127
|
+
queries.append(query)
|
128
|
+
elif isinstance(query, list):
|
129
|
+
queries.extend(query)
|
126
130
|
|
127
131
|
def add_query_uri(self, query_uri: str) -> None:
|
128
132
|
"""
|
@@ -60,11 +60,16 @@ class DataprocMetastoreHook(GoogleBaseHook):
|
|
60
60
|
|
61
61
|
def wait_for_operation(self, timeout: float | None, operation: Operation):
|
62
62
|
"""Wait for long-lasting operation to complete."""
|
63
|
+
self.log.info("Waiting for operation (timeout: %s seconds)", timeout)
|
64
|
+
|
63
65
|
try:
|
64
|
-
|
65
|
-
|
66
|
+
result = operation.result(timeout=timeout)
|
67
|
+
self.log.info("Operation completed successfully")
|
68
|
+
return result
|
69
|
+
except Exception as e:
|
70
|
+
self.log.error("Operation failed: %s", str(e))
|
66
71
|
error = operation.exception(timeout=timeout)
|
67
|
-
raise AirflowException(error)
|
72
|
+
raise AirflowException(f"Operation failed: {error}")
|
68
73
|
|
69
74
|
@GoogleBaseHook.fallback_to_default_project_id
|
70
75
|
def create_backup(
|
@@ -669,23 +674,37 @@ class DataprocMetastoreHook(GoogleBaseHook):
|
|
669
674
|
# because dictionaries are ordered since Python 3.7+
|
670
675
|
_partitions = list(dict.fromkeys(partition_names)) if partition_names else []
|
671
676
|
|
672
|
-
query = f"""
|
673
|
-
SELECT *
|
674
|
-
FROM PARTITIONS
|
675
|
-
INNER JOIN TBLS
|
676
|
-
ON PARTITIONS.TBL_ID = TBLS.TBL_ID
|
677
|
-
WHERE
|
678
|
-
TBLS.TBL_NAME = '{table}'"""
|
679
677
|
if _partitions:
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
678
|
+
partition_list = ", ".join(f"'{p}'" for p in _partitions)
|
679
|
+
query = f"""
|
680
|
+
SELECT PARTITIONS.*, TBLS.TBL_TYPE, TBLS.TBL_NAME
|
681
|
+
FROM PARTITIONS
|
682
|
+
INNER JOIN TBLS ON PARTITIONS.TBL_ID = TBLS.TBL_ID
|
683
|
+
WHERE TBLS.TBL_NAME = '{table}'
|
684
|
+
AND PARTITIONS.PART_NAME IN ({partition_list});"""
|
685
|
+
else:
|
686
|
+
query = f"""
|
687
|
+
SELECT PARTITIONS.*, TBLS.TBL_TYPE, TBLS.TBL_NAME
|
688
|
+
FROM PARTITIONS
|
689
|
+
INNER JOIN TBLS ON PARTITIONS.TBL_ID = TBLS.TBL_ID
|
690
|
+
WHERE TBLS.TBL_NAME = '{table}';"""
|
691
|
+
|
692
|
+
request = {
|
693
|
+
"service": f"projects/{project_id}/locations/{region}/services/{service_id}",
|
694
|
+
"query": query,
|
695
|
+
}
|
696
|
+
|
697
|
+
self.log.info("Prepared request:")
|
698
|
+
self.log.info(request)
|
699
|
+
|
700
|
+
# Execute query
|
701
|
+
try:
|
702
|
+
self.log.info("Getting Dataproc Metastore client (v1beta)...")
|
703
|
+
client = self.get_dataproc_metastore_client_v1beta()
|
704
|
+
self.log.info("Executing query_metadata...")
|
705
|
+
result = client.query_metadata(request=request)
|
706
|
+
self.log.info("Query executed successfully")
|
707
|
+
return result
|
708
|
+
except Exception as e:
|
709
|
+
self.log.error("Error executing query_metadata: %s", str(e))
|
710
|
+
raise
|
@@ -23,7 +23,7 @@ import contextlib
|
|
23
23
|
import json
|
24
24
|
import time
|
25
25
|
from collections.abc import Sequence
|
26
|
-
from typing import TYPE_CHECKING
|
26
|
+
from typing import TYPE_CHECKING, Any
|
27
27
|
|
28
28
|
from google.api_core.exceptions import NotFound
|
29
29
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
@@ -33,8 +33,7 @@ from google.auth.transport import requests as google_requests
|
|
33
33
|
from google.cloud import exceptions # type: ignore[attr-defined]
|
34
34
|
from google.cloud.container_v1 import ClusterManagerAsyncClient, ClusterManagerClient
|
35
35
|
from google.cloud.container_v1.types import Cluster, Operation
|
36
|
-
from kubernetes import client
|
37
|
-
from kubernetes.client.models import V1Deployment
|
36
|
+
from kubernetes import client
|
38
37
|
from kubernetes_asyncio import client as async_client
|
39
38
|
from kubernetes_asyncio.config.kube_config import FileOrData
|
40
39
|
|
@@ -434,38 +433,9 @@ class GKEKubernetesHook(GoogleBaseHook, KubernetesHook):
|
|
434
433
|
enable_tcp_keepalive=self.enable_tcp_keepalive,
|
435
434
|
).get_conn()
|
436
435
|
|
437
|
-
def check_kueue_deployment_running(self, name, namespace):
|
438
|
-
timeout = 300
|
439
|
-
polling_period_seconds = 2
|
440
|
-
|
441
|
-
while timeout is None or timeout > 0:
|
442
|
-
try:
|
443
|
-
deployment = self.get_deployment_status(name=name, namespace=namespace)
|
444
|
-
deployment_status = V1Deployment.to_dict(deployment)["status"]
|
445
|
-
replicas = deployment_status["replicas"]
|
446
|
-
ready_replicas = deployment_status["ready_replicas"]
|
447
|
-
unavailable_replicas = deployment_status["unavailable_replicas"]
|
448
|
-
if (
|
449
|
-
replicas is not None
|
450
|
-
and ready_replicas is not None
|
451
|
-
and unavailable_replicas is None
|
452
|
-
and replicas == ready_replicas
|
453
|
-
):
|
454
|
-
return
|
455
|
-
else:
|
456
|
-
self.log.info("Waiting until Deployment will be ready...")
|
457
|
-
time.sleep(polling_period_seconds)
|
458
|
-
except Exception as e:
|
459
|
-
self.log.exception("Exception occurred while checking for Deployment status.")
|
460
|
-
raise e
|
461
|
-
|
462
|
-
if timeout is not None:
|
463
|
-
timeout -= polling_period_seconds
|
464
|
-
|
465
|
-
raise AirflowException("Deployment timed out")
|
466
|
-
|
467
436
|
def apply_from_yaml_file(
|
468
437
|
self,
|
438
|
+
api_client: Any = None,
|
469
439
|
yaml_file: str | None = None,
|
470
440
|
yaml_objects: list[dict] | None = None,
|
471
441
|
verbose: bool = False,
|
@@ -474,18 +444,17 @@ class GKEKubernetesHook(GoogleBaseHook, KubernetesHook):
|
|
474
444
|
"""
|
475
445
|
Perform an action from a yaml file.
|
476
446
|
|
447
|
+
:param api_client: A Kubernetes client application.
|
477
448
|
:param yaml_file: Contains the path to yaml file.
|
478
449
|
:param yaml_objects: List of YAML objects; used instead of reading the yaml_file.
|
479
450
|
:param verbose: If True, print confirmation from create action. Default is False.
|
480
451
|
:param namespace: Contains the namespace to create all resources inside. The namespace must
|
481
452
|
preexist otherwise the resource creation will fail.
|
482
453
|
"""
|
483
|
-
|
484
|
-
|
485
|
-
utils.create_from_yaml(
|
486
|
-
k8s_client=k8s_client,
|
487
|
-
yaml_objects=yaml_objects,
|
454
|
+
super().apply_from_yaml_file(
|
455
|
+
api_client=api_client or self.get_conn(),
|
488
456
|
yaml_file=yaml_file,
|
457
|
+
yaml_objects=yaml_objects,
|
489
458
|
verbose=verbose,
|
490
459
|
namespace=namespace,
|
491
460
|
)
|