apache-airflow-providers-google 10.7.0__py3-none-any.whl → 10.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +10 -6
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +4 -1
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +31 -34
- airflow/providers/google/cloud/hooks/automl.py +11 -9
- airflow/providers/google/cloud/hooks/bigquery.py +30 -36
- airflow/providers/google/cloud/hooks/bigquery_dts.py +5 -3
- airflow/providers/google/cloud/hooks/bigtable.py +11 -8
- airflow/providers/google/cloud/hooks/cloud_batch.py +5 -3
- airflow/providers/google/cloud/hooks/cloud_build.py +6 -4
- airflow/providers/google/cloud/hooks/cloud_composer.py +14 -10
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -3
- airflow/providers/google/cloud/hooks/cloud_run.py +5 -3
- airflow/providers/google/cloud/hooks/cloud_sql.py +11 -14
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +8 -6
- airflow/providers/google/cloud/hooks/compute.py +5 -3
- airflow/providers/google/cloud/hooks/compute_ssh.py +1 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +5 -3
- airflow/providers/google/cloud/hooks/dataflow.py +8 -11
- airflow/providers/google/cloud/hooks/dataform.py +4 -2
- airflow/providers/google/cloud/hooks/datafusion.py +24 -6
- airflow/providers/google/cloud/hooks/dataplex.py +75 -6
- airflow/providers/google/cloud/hooks/dataproc.py +9 -7
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +8 -6
- airflow/providers/google/cloud/hooks/dlp.py +139 -137
- airflow/providers/google/cloud/hooks/gcs.py +15 -20
- airflow/providers/google/cloud/hooks/kms.py +4 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +34 -34
- airflow/providers/google/cloud/hooks/looker.py +4 -1
- airflow/providers/google/cloud/hooks/mlengine.py +8 -6
- airflow/providers/google/cloud/hooks/natural_language.py +4 -2
- airflow/providers/google/cloud/hooks/os_login.py +9 -7
- airflow/providers/google/cloud/hooks/pubsub.py +13 -11
- airflow/providers/google/cloud/hooks/spanner.py +7 -5
- airflow/providers/google/cloud/hooks/speech_to_text.py +4 -2
- airflow/providers/google/cloud/hooks/stackdriver.py +6 -5
- airflow/providers/google/cloud/hooks/tasks.py +5 -3
- airflow/providers/google/cloud/hooks/text_to_speech.py +4 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +7 -5
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +6 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +11 -9
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +12 -10
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +8 -6
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +6 -4
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +7 -5
- airflow/providers/google/cloud/hooks/video_intelligence.py +5 -3
- airflow/providers/google/cloud/hooks/vision.py +5 -3
- airflow/providers/google/cloud/hooks/workflows.py +8 -6
- airflow/providers/google/cloud/links/bigquery.py +1 -1
- airflow/providers/google/cloud/links/bigquery_dts.py +1 -1
- airflow/providers/google/cloud/links/cloud_functions.py +1 -1
- airflow/providers/google/cloud/links/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/links/cloud_sql.py +1 -1
- airflow/providers/google/cloud/links/cloud_tasks.py +1 -1
- airflow/providers/google/cloud/links/compute.py +1 -1
- airflow/providers/google/cloud/links/datacatalog.py +1 -1
- airflow/providers/google/cloud/links/dataflow.py +1 -1
- airflow/providers/google/cloud/links/dataform.py +1 -1
- airflow/providers/google/cloud/links/pubsub.py +1 -1
- airflow/providers/google/cloud/links/spanner.py +1 -1
- airflow/providers/google/cloud/links/stackdriver.py +1 -1
- airflow/providers/google/cloud/links/workflows.py +2 -2
- airflow/providers/google/cloud/log/gcs_task_handler.py +5 -7
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +8 -4
- airflow/providers/google/cloud/operators/automl.py +2 -1
- airflow/providers/google/cloud/operators/bigquery.py +6 -2
- airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/operators/bigtable.py +5 -3
- airflow/providers/google/cloud/operators/cloud_batch.py +6 -3
- airflow/providers/google/cloud/operators/cloud_build.py +2 -1
- airflow/providers/google/cloud/operators/cloud_composer.py +3 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +3 -2
- airflow/providers/google/cloud/operators/cloud_run.py +3 -2
- airflow/providers/google/cloud/operators/cloud_sql.py +157 -152
- airflow/providers/google/cloud/operators/compute.py +59 -61
- airflow/providers/google/cloud/operators/datacatalog.py +3 -2
- airflow/providers/google/cloud/operators/dataflow.py +3 -1
- airflow/providers/google/cloud/operators/dataform.py +2 -1
- airflow/providers/google/cloud/operators/datafusion.py +1 -1
- airflow/providers/google/cloud/operators/dataplex.py +110 -8
- airflow/providers/google/cloud/operators/dataproc.py +39 -18
- airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
- airflow/providers/google/cloud/operators/dlp.py +3 -2
- airflow/providers/google/cloud/operators/functions.py +46 -46
- airflow/providers/google/cloud/operators/gcs.py +4 -6
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -1
- airflow/providers/google/cloud/operators/natural_language.py +3 -2
- airflow/providers/google/cloud/operators/pubsub.py +2 -1
- airflow/providers/google/cloud/operators/speech_to_text.py +3 -2
- airflow/providers/google/cloud/operators/stackdriver.py +2 -1
- airflow/providers/google/cloud/operators/tasks.py +3 -2
- airflow/providers/google/cloud/operators/text_to_speech.py +3 -2
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -2
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +3 -2
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -4
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -2
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
- airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
- airflow/providers/google/cloud/operators/vision.py +3 -2
- airflow/providers/google/cloud/operators/workflows.py +7 -5
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/dataplex.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
- airflow/providers/google/cloud/sensors/gcs.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -4
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +1 -4
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -1
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +4 -2
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +1 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +2 -1
- airflow/providers/google/cloud/transfers/presto_to_gcs.py +5 -4
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +5 -4
- airflow/providers/google/cloud/triggers/bigquery.py +30 -36
- airflow/providers/google/cloud/triggers/bigquery_dts.py +9 -10
- airflow/providers/google/cloud/triggers/cloud_batch.py +6 -8
- airflow/providers/google/cloud/triggers/cloud_build.py +5 -6
- airflow/providers/google/cloud/triggers/cloud_run.py +4 -3
- airflow/providers/google/cloud/triggers/cloud_sql.py +10 -10
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +1 -1
- airflow/providers/google/cloud/triggers/dataflow.py +5 -6
- airflow/providers/google/cloud/triggers/datafusion.py +5 -6
- airflow/providers/google/cloud/triggers/dataplex.py +110 -0
- airflow/providers/google/cloud/triggers/dataproc.py +18 -20
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +14 -13
- airflow/providers/google/cloud/triggers/mlengine.py +5 -5
- airflow/providers/google/cloud/triggers/pubsub.py +2 -2
- airflow/providers/google/cloud/utils/bigquery_get_data.py +6 -3
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +13 -13
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +5 -3
- airflow/providers/google/cloud/utils/mlengine_prediction_summary.py +1 -1
- airflow/providers/google/common/hooks/base_google.py +10 -2
- airflow/providers/google/common/links/storage.py +1 -1
- airflow/providers/google/common/utils/id_token_credentials.py +4 -1
- airflow/providers/google/get_provider_info.py +5 -0
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +4 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +6 -3
- airflow/providers/google/suite/hooks/calendar.py +4 -2
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/METADATA +6 -6
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/RECORD +155 -173
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/WHEEL +1 -1
- airflow/providers/google/ads/_vendor/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/interceptors/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/common/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/common/types/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/enums/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/enums/types/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/errors/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/errors/types/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/resources/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/resources/types/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/services/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/services/customer_service/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/services/customer_service/transports/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/services/google_ads_service/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/services/google_ads_service/transports/__init__.py +0 -16
- airflow/providers/google/ads/_vendor/googleads/v12/services/types/__init__.py +0 -16
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/LICENSE +0 -0
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/NOTICE +0 -0
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/entry_points.txt +0 -0
- {apache_airflow_providers_google-10.7.0.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/top_level.txt +0 -0
@@ -22,17 +22,20 @@ from time import sleep
|
|
22
22
|
from typing import TYPE_CHECKING, Any, Sequence
|
23
23
|
|
24
24
|
from airflow import AirflowException
|
25
|
+
from airflow.providers.google.cloud.triggers.dataplex import DataplexDataQualityJobTrigger
|
25
26
|
|
26
27
|
if TYPE_CHECKING:
|
28
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
29
|
+
|
27
30
|
from airflow.utils.context import Context
|
28
31
|
|
29
32
|
from google.api_core.exceptions import AlreadyExists, GoogleAPICallError
|
30
33
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
31
34
|
from google.api_core.retry import Retry, exponential_sleep_generator
|
32
35
|
from google.cloud.dataplex_v1.types import Asset, DataScan, DataScanJob, Lake, Task, Zone
|
33
|
-
from google.protobuf.field_mask_pb2 import FieldMask
|
34
36
|
from googleapiclient.errors import HttpError
|
35
37
|
|
38
|
+
from airflow.configuration import conf
|
36
39
|
from airflow.providers.google.cloud.hooks.dataplex import AirflowDataQualityScanException, DataplexHook
|
37
40
|
from airflow.providers.google.cloud.links.dataplex import (
|
38
41
|
DataplexLakeLink,
|
@@ -894,6 +897,9 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
|
|
894
897
|
:param result_timeout: Value in seconds for which operator will wait for the Data Quality scan result
|
895
898
|
when the flag `asynchronous = False`.
|
896
899
|
Throws exception if there is no result found after specified amount of seconds.
|
900
|
+
:param polling_interval_seconds: time in seconds between polling for job completion.
|
901
|
+
The value is considered only when running in deferrable mode. Must be greater than 0.
|
902
|
+
:param deferrable: Run operator in the deferrable mode.
|
897
903
|
|
898
904
|
:return: Dataplex Data Quality scan job id.
|
899
905
|
"""
|
@@ -914,6 +920,8 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
|
|
914
920
|
asynchronous: bool = False,
|
915
921
|
fail_on_dq_failure: bool = False,
|
916
922
|
result_timeout: float = 60.0 * 10,
|
923
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
924
|
+
polling_interval_seconds: int = 10,
|
917
925
|
*args,
|
918
926
|
**kwargs,
|
919
927
|
) -> None:
|
@@ -931,6 +939,8 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
|
|
931
939
|
self.asynchronous = asynchronous
|
932
940
|
self.fail_on_dq_failure = fail_on_dq_failure
|
933
941
|
self.result_timeout = result_timeout
|
942
|
+
self.deferrable = deferrable
|
943
|
+
self.polling_interval_seconds = polling_interval_seconds
|
934
944
|
|
935
945
|
def execute(self, context: Context) -> str:
|
936
946
|
hook = DataplexHook(
|
@@ -948,6 +958,24 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
|
|
948
958
|
metadata=self.metadata,
|
949
959
|
)
|
950
960
|
job_id = result.job.name.split("/")[-1]
|
961
|
+
|
962
|
+
if self.deferrable:
|
963
|
+
if self.asynchronous:
|
964
|
+
raise AirflowException(
|
965
|
+
"Both asynchronous and deferrable parameters were passed. Please, provide only one."
|
966
|
+
)
|
967
|
+
self.defer(
|
968
|
+
trigger=DataplexDataQualityJobTrigger(
|
969
|
+
job_id=job_id,
|
970
|
+
data_scan_id=self.data_scan_id,
|
971
|
+
project_id=self.project_id,
|
972
|
+
region=self.region,
|
973
|
+
gcp_conn_id=self.gcp_conn_id,
|
974
|
+
impersonation_chain=self.impersonation_chain,
|
975
|
+
polling_interval_seconds=self.polling_interval_seconds,
|
976
|
+
),
|
977
|
+
method_name="execute_complete",
|
978
|
+
)
|
951
979
|
if not self.asynchronous:
|
952
980
|
job = hook.wait_for_data_scan_job(
|
953
981
|
job_id=job_id,
|
@@ -973,6 +1001,31 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
|
|
973
1001
|
|
974
1002
|
return job_id
|
975
1003
|
|
1004
|
+
def execute_complete(self, context, event=None) -> None:
|
1005
|
+
"""
|
1006
|
+
Callback for when the trigger fires - returns immediately.
|
1007
|
+
|
1008
|
+
Relies on trigger to throw an exception, otherwise it assumes execution was
|
1009
|
+
successful.
|
1010
|
+
"""
|
1011
|
+
job_state = event["job_state"]
|
1012
|
+
job_id = event["job_id"]
|
1013
|
+
if job_state == DataScanJob.State.FAILED:
|
1014
|
+
raise AirflowException(f"Job failed:\n{job_id}")
|
1015
|
+
if job_state == DataScanJob.State.CANCELLED:
|
1016
|
+
raise AirflowException(f"Job was cancelled:\n{job_id}")
|
1017
|
+
if job_state == DataScanJob.State.SUCCEEDED:
|
1018
|
+
job = event["job"]
|
1019
|
+
if not job["data_quality_result"]["passed"]:
|
1020
|
+
if self.fail_on_dq_failure:
|
1021
|
+
raise AirflowDataQualityScanException(
|
1022
|
+
f"Data Quality job {job_id} execution failed due to failure of its scanning "
|
1023
|
+
f"rules: {self.data_scan_id}"
|
1024
|
+
)
|
1025
|
+
else:
|
1026
|
+
self.log.info("Data Quality job executed successfully.")
|
1027
|
+
return job_id
|
1028
|
+
|
976
1029
|
|
977
1030
|
class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
|
978
1031
|
"""
|
@@ -1005,6 +1058,9 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
|
|
1005
1058
|
:param result_timeout: Value in seconds for which operator will wait for the Data Quality scan result
|
1006
1059
|
when the flag `wait_for_result = True`.
|
1007
1060
|
Throws exception if there is no result found after specified amount of seconds.
|
1061
|
+
:param polling_interval_seconds: time in seconds between polling for job completion.
|
1062
|
+
The value is considered only when running in deferrable mode. Must be greater than 0.
|
1063
|
+
:param deferrable: Run operator in the deferrable mode.
|
1008
1064
|
|
1009
1065
|
:return: Dict representing DataScanJob.
|
1010
1066
|
When the job completes with a successful status, information about the Data Quality result
|
@@ -1028,6 +1084,8 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
|
|
1028
1084
|
fail_on_dq_failure: bool = False,
|
1029
1085
|
wait_for_results: bool = True,
|
1030
1086
|
result_timeout: float = 60.0 * 10,
|
1087
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
1088
|
+
polling_interval_seconds: int = 10,
|
1031
1089
|
*args,
|
1032
1090
|
**kwargs,
|
1033
1091
|
) -> None:
|
@@ -1045,6 +1103,8 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
|
|
1045
1103
|
self.fail_on_dq_failure = fail_on_dq_failure
|
1046
1104
|
self.wait_for_results = wait_for_results
|
1047
1105
|
self.result_timeout = result_timeout
|
1106
|
+
self.deferrable = deferrable
|
1107
|
+
self.polling_interval_seconds = polling_interval_seconds
|
1048
1108
|
|
1049
1109
|
def execute(self, context: Context) -> dict:
|
1050
1110
|
hook = DataplexHook(
|
@@ -1069,13 +1129,27 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
|
|
1069
1129
|
self.job_id = job_id.split("/")[-1]
|
1070
1130
|
|
1071
1131
|
if self.wait_for_results:
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1132
|
+
if self.deferrable:
|
1133
|
+
self.defer(
|
1134
|
+
trigger=DataplexDataQualityJobTrigger(
|
1135
|
+
job_id=self.job_id,
|
1136
|
+
data_scan_id=self.data_scan_id,
|
1137
|
+
project_id=self.project_id,
|
1138
|
+
region=self.region,
|
1139
|
+
gcp_conn_id=self.gcp_conn_id,
|
1140
|
+
impersonation_chain=self.impersonation_chain,
|
1141
|
+
polling_interval_seconds=self.polling_interval_seconds,
|
1142
|
+
),
|
1143
|
+
method_name="execute_complete",
|
1144
|
+
)
|
1145
|
+
else:
|
1146
|
+
job = hook.wait_for_data_scan_job(
|
1147
|
+
job_id=self.job_id,
|
1148
|
+
data_scan_id=self.data_scan_id,
|
1149
|
+
project_id=self.project_id,
|
1150
|
+
region=self.region,
|
1151
|
+
result_timeout=self.result_timeout,
|
1152
|
+
)
|
1079
1153
|
else:
|
1080
1154
|
job = hook.get_data_scan_job(
|
1081
1155
|
project_id=self.project_id,
|
@@ -1104,6 +1178,34 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
|
|
1104
1178
|
|
1105
1179
|
return result
|
1106
1180
|
|
1181
|
+
def execute_complete(self, context, event=None) -> None:
|
1182
|
+
"""
|
1183
|
+
Callback for when the trigger fires - returns immediately.
|
1184
|
+
|
1185
|
+
Relies on trigger to throw an exception, otherwise it assumes execution was
|
1186
|
+
successful.
|
1187
|
+
"""
|
1188
|
+
job_state = event["job_state"]
|
1189
|
+
job_id = event["job_id"]
|
1190
|
+
job = event["job"]
|
1191
|
+
if job_state == DataScanJob.State.FAILED:
|
1192
|
+
raise AirflowException(f"Job failed:\n{job_id}")
|
1193
|
+
if job_state == DataScanJob.State.CANCELLED:
|
1194
|
+
raise AirflowException(f"Job was cancelled:\n{job_id}")
|
1195
|
+
if job_state == DataScanJob.State.SUCCEEDED:
|
1196
|
+
if not job["data_quality_result"]["passed"]:
|
1197
|
+
if self.fail_on_dq_failure:
|
1198
|
+
raise AirflowDataQualityScanException(
|
1199
|
+
f"Data Quality job {self.job_id} execution failed due to failure of its scanning "
|
1200
|
+
f"rules: {self.data_scan_id}"
|
1201
|
+
)
|
1202
|
+
else:
|
1203
|
+
self.log.info("Data Quality job executed successfully")
|
1204
|
+
else:
|
1205
|
+
self.log.info("Data Quality job execution returned status: %s", job_state)
|
1206
|
+
|
1207
|
+
return job
|
1208
|
+
|
1107
1209
|
|
1108
1210
|
class DataplexCreateZoneOperator(GoogleCloudBaseOperator):
|
1109
1211
|
"""
|
@@ -29,13 +29,10 @@ from datetime import datetime, timedelta
|
|
29
29
|
from enum import Enum
|
30
30
|
from typing import TYPE_CHECKING, Any, Sequence
|
31
31
|
|
32
|
-
from google.api_core import operation # type: ignore
|
33
32
|
from google.api_core.exceptions import AlreadyExists, NotFound
|
34
33
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
35
34
|
from google.api_core.retry import Retry, exponential_sleep_generator
|
36
35
|
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
37
|
-
from google.protobuf.duration_pb2 import Duration
|
38
|
-
from google.protobuf.field_mask_pb2 import FieldMask
|
39
36
|
|
40
37
|
from airflow.configuration import conf
|
41
38
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -64,6 +61,10 @@ from airflow.providers.google.cloud.triggers.dataproc import (
|
|
64
61
|
from airflow.utils import timezone
|
65
62
|
|
66
63
|
if TYPE_CHECKING:
|
64
|
+
from google.api_core import operation
|
65
|
+
from google.protobuf.duration_pb2 import Duration
|
66
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
67
|
+
|
67
68
|
from airflow.utils.context import Context
|
68
69
|
|
69
70
|
|
@@ -244,12 +245,13 @@ class ClusterGenerator:
|
|
244
245
|
return PreemptibilityType(preemptibility.upper())
|
245
246
|
|
246
247
|
def _get_init_action_timeout(self) -> dict:
|
247
|
-
match = re.
|
248
|
+
match = re.fullmatch(r"(\d+)([sm])", self.init_action_timeout)
|
248
249
|
if match:
|
249
|
-
val =
|
250
|
-
|
251
|
-
|
252
|
-
|
250
|
+
val = int(match.group(1))
|
251
|
+
unit = match.group(2)
|
252
|
+
if unit == "s":
|
253
|
+
return {"seconds": val}
|
254
|
+
elif unit == "m":
|
253
255
|
return {"seconds": int(timedelta(minutes=val).total_seconds())}
|
254
256
|
|
255
257
|
raise AirflowException(
|
@@ -594,13 +596,17 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
594
596
|
if cluster.status.state != cluster.status.State.ERROR:
|
595
597
|
return
|
596
598
|
self.log.info("Cluster is in ERROR state")
|
599
|
+
self.log.info("Gathering diagnostic information.")
|
597
600
|
gcs_uri = hook.diagnose_cluster(
|
598
601
|
region=self.region, cluster_name=self.cluster_name, project_id=self.project_id
|
599
602
|
)
|
600
603
|
self.log.info("Diagnostic information for cluster %s available at: %s", self.cluster_name, gcs_uri)
|
601
604
|
if self.delete_on_error:
|
602
605
|
self._delete_cluster(hook)
|
603
|
-
|
606
|
+
# The delete op is asynchronous and can cause further failure if the cluster finishes
|
607
|
+
# deleting between catching AlreadyExists and checking state
|
608
|
+
self._wait_for_cluster_in_deleting_state(hook)
|
609
|
+
raise AirflowException("Cluster was created in an ERROR state then deleted.")
|
604
610
|
raise AirflowException("Cluster was created but is in ERROR state")
|
605
611
|
|
606
612
|
def _wait_for_cluster_in_deleting_state(self, hook: DataprocHook) -> None:
|
@@ -667,6 +673,22 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
667
673
|
raise
|
668
674
|
self.log.info("Cluster already exists.")
|
669
675
|
cluster = self._get_cluster(hook)
|
676
|
+
except AirflowException as ae:
|
677
|
+
# There still could be a cluster created here in an ERROR state which
|
678
|
+
# should be deleted immediately rather than consuming another retry attempt
|
679
|
+
# (assuming delete_on_error is true (default))
|
680
|
+
# This reduces overall the number of task attempts from 3 to 2 to successful cluster creation
|
681
|
+
# assuming the underlying GCE issues have resolved within that window. Users can configure
|
682
|
+
# a higher number of retry attempts in powers of two with 30s-60s wait interval
|
683
|
+
try:
|
684
|
+
cluster = self._get_cluster(hook)
|
685
|
+
self._handle_error_state(hook, cluster)
|
686
|
+
except AirflowException as ae_inner:
|
687
|
+
# We could get any number of failures here, including cluster not found and we
|
688
|
+
# can just ignore to ensure we surface the original cluster create failure
|
689
|
+
self.log.error(ae_inner, exc_info=True)
|
690
|
+
finally:
|
691
|
+
raise ae
|
670
692
|
|
671
693
|
# Check if cluster is not in ERROR state
|
672
694
|
self._handle_error_state(hook, cluster)
|
@@ -790,18 +812,17 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
|
790
812
|
return None
|
791
813
|
|
792
814
|
timeout = None
|
793
|
-
match = re.
|
815
|
+
match = re.fullmatch(r"(\d+)([smdh])", self.graceful_decommission_timeout)
|
794
816
|
if match:
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
817
|
+
val = int(match.group(1))
|
818
|
+
unit = match.group(2)
|
819
|
+
if unit == "s":
|
820
|
+
timeout = val
|
821
|
+
elif unit == "m":
|
799
822
|
timeout = int(timedelta(minutes=val).total_seconds())
|
800
|
-
elif
|
801
|
-
val = float(match.group(1))
|
823
|
+
elif unit == "h":
|
802
824
|
timeout = int(timedelta(hours=val).total_seconds())
|
803
|
-
elif
|
804
|
-
val = float(match.group(1))
|
825
|
+
elif unit == "d":
|
805
826
|
timeout = int(timedelta(days=val).total_seconds())
|
806
827
|
|
807
828
|
if not timeout:
|
@@ -26,7 +26,6 @@ from google.api_core.retry import Retry, exponential_sleep_generator
|
|
26
26
|
from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
|
27
27
|
from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
|
28
28
|
from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
|
29
|
-
from google.protobuf.field_mask_pb2 import FieldMask
|
30
29
|
from googleapiclient.errors import HttpError
|
31
30
|
|
32
31
|
from airflow import AirflowException
|
@@ -37,6 +36,8 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
|
|
37
36
|
from airflow.providers.google.common.links.storage import StorageLink
|
38
37
|
|
39
38
|
if TYPE_CHECKING:
|
39
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
40
|
+
|
40
41
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
41
42
|
from airflow.utils.context import Context
|
42
43
|
|
@@ -23,7 +23,6 @@ from typing import TYPE_CHECKING, Sequence
|
|
23
23
|
|
24
24
|
from google.api_core.exceptions import AlreadyExists, InvalidArgument, NotFound
|
25
25
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
26
|
-
from google.api_core.retry import Retry
|
27
26
|
from google.cloud.dlp_v2.types import (
|
28
27
|
ByteContentItem,
|
29
28
|
ContentItem,
|
@@ -44,7 +43,6 @@ from google.cloud.dlp_v2.types import (
|
|
44
43
|
StoredInfoType,
|
45
44
|
StoredInfoTypeConfig,
|
46
45
|
)
|
47
|
-
from google.protobuf.field_mask_pb2 import FieldMask
|
48
46
|
|
49
47
|
from airflow.providers.google.cloud.hooks.dlp import CloudDLPHook
|
50
48
|
from airflow.providers.google.cloud.links.data_loss_prevention import (
|
@@ -63,6 +61,9 @@ from airflow.providers.google.cloud.links.data_loss_prevention import (
|
|
63
61
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
64
62
|
|
65
63
|
if TYPE_CHECKING:
|
64
|
+
from google.api_core.retry import Retry
|
65
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
66
|
+
|
66
67
|
from airflow.utils.context import Context
|
67
68
|
|
68
69
|
|
@@ -51,54 +51,54 @@ def _validate_max_instances(value):
|
|
51
51
|
|
52
52
|
|
53
53
|
CLOUD_FUNCTION_VALIDATION: list[dict[str, Any]] = [
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
name
|
66
|
-
type
|
67
|
-
fields
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
54
|
+
{"name": "name", "regexp": "^.+$"},
|
55
|
+
{"name": "description", "regexp": "^.+$", "optional": True},
|
56
|
+
{"name": "entryPoint", "regexp": r"^.+$", "optional": True},
|
57
|
+
{"name": "runtime", "regexp": r"^.+$", "optional": True},
|
58
|
+
{"name": "timeout", "regexp": r"^.+$", "optional": True},
|
59
|
+
{"name": "availableMemoryMb", "custom_validation": _validate_available_memory_in_mb, "optional": True},
|
60
|
+
{"name": "labels", "optional": True},
|
61
|
+
{"name": "environmentVariables", "optional": True},
|
62
|
+
{"name": "network", "regexp": r"^.+$", "optional": True},
|
63
|
+
{"name": "maxInstances", "optional": True, "custom_validation": _validate_max_instances},
|
64
|
+
{
|
65
|
+
"name": "source_code",
|
66
|
+
"type": "union",
|
67
|
+
"fields": [
|
68
|
+
{"name": "sourceArchiveUrl", "regexp": r"^.+$"},
|
69
|
+
{"name": "sourceRepositoryUrl", "regexp": r"^.+$", "api_version": "v1beta2"},
|
70
|
+
{"name": "sourceRepository", "type": "dict", "fields": [{"name": "url", "regexp": r"^.+$"}]},
|
71
|
+
{"name": "sourceUploadUrl"},
|
72
72
|
],
|
73
|
-
|
74
|
-
|
75
|
-
name
|
76
|
-
type
|
77
|
-
fields
|
78
|
-
|
79
|
-
name
|
80
|
-
type
|
81
|
-
fields
|
73
|
+
},
|
74
|
+
{
|
75
|
+
"name": "trigger",
|
76
|
+
"type": "union",
|
77
|
+
"fields": [
|
78
|
+
{
|
79
|
+
"name": "httpsTrigger",
|
80
|
+
"type": "dict",
|
81
|
+
"fields": [
|
82
82
|
# This dict should be empty at input (url is added at output)
|
83
83
|
],
|
84
|
-
|
85
|
-
|
86
|
-
name
|
87
|
-
type
|
88
|
-
fields
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
name
|
94
|
-
type
|
95
|
-
optional
|
96
|
-
fields
|
97
|
-
|
84
|
+
},
|
85
|
+
{
|
86
|
+
"name": "eventTrigger",
|
87
|
+
"type": "dict",
|
88
|
+
"fields": [
|
89
|
+
{"name": "eventType", "regexp": r"^.+$"},
|
90
|
+
{"name": "resource", "regexp": r"^.+$"},
|
91
|
+
{"name": "service", "regexp": r"^.+$", "optional": True},
|
92
|
+
{
|
93
|
+
"name": "failurePolicy",
|
94
|
+
"type": "dict",
|
95
|
+
"optional": True,
|
96
|
+
"fields": [{"name": "retry", "type": "dict", "optional": True}],
|
97
|
+
},
|
98
98
|
],
|
99
|
-
|
99
|
+
},
|
100
100
|
],
|
101
|
-
|
101
|
+
},
|
102
102
|
]
|
103
103
|
|
104
104
|
|
@@ -282,9 +282,9 @@ class ZipPathPreprocessor:
|
|
282
282
|
if self._is_present_and_empty(self.body, GCF_SOURCE_UPLOAD_URL):
|
283
283
|
if not self.zip_path:
|
284
284
|
raise AirflowException(
|
285
|
-
"Parameter '{
|
286
|
-
"is missing or empty. You need to have non empty '{
|
287
|
-
"when '{
|
285
|
+
f"Parameter '{GCF_SOURCE_UPLOAD_URL}' is empty in the body and argument '{GCF_ZIP_PATH}' "
|
286
|
+
f"is missing or empty. You need to have non empty '{GCF_ZIP_PATH}' "
|
287
|
+
f"when '{GCF_SOURCE_UPLOAD_URL}' is present and empty."
|
288
288
|
)
|
289
289
|
|
290
290
|
def _verify_upload_url_and_zip_path(self) -> None:
|
@@ -797,9 +797,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
797
797
|
num_max_attempts=self.download_num_attempts,
|
798
798
|
)
|
799
799
|
except GoogleCloudError:
|
800
|
-
if self.download_continue_on_fail:
|
801
|
-
|
802
|
-
raise
|
800
|
+
if not self.download_continue_on_fail:
|
801
|
+
raise
|
803
802
|
|
804
803
|
self.log.info("Starting the transformation")
|
805
804
|
cmd = [self.transform_script] if isinstance(self.transform_script, str) else self.transform_script
|
@@ -847,9 +846,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
|
|
847
846
|
)
|
848
847
|
files_uploaded.append(str(upload_file_name))
|
849
848
|
except GoogleCloudError:
|
850
|
-
if self.upload_continue_on_fail:
|
851
|
-
|
852
|
-
raise
|
849
|
+
if not self.upload_continue_on_fail:
|
850
|
+
raise
|
853
851
|
|
854
852
|
return files_uploaded
|
855
853
|
|
@@ -24,7 +24,6 @@ from typing import TYPE_CHECKING, Any, Sequence
|
|
24
24
|
|
25
25
|
from google.api_core.exceptions import AlreadyExists
|
26
26
|
from google.cloud.container_v1.types import Cluster
|
27
|
-
from kubernetes.client.models import V1Pod
|
28
27
|
|
29
28
|
from airflow.configuration import conf
|
30
29
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -46,6 +45,8 @@ from airflow.providers.google.cloud.triggers.kubernetes_engine import GKEOperati
|
|
46
45
|
from airflow.utils.timezone import utcnow
|
47
46
|
|
48
47
|
if TYPE_CHECKING:
|
48
|
+
from kubernetes.client.models import V1Pod
|
49
|
+
|
49
50
|
from airflow.utils.context import Context
|
50
51
|
|
51
52
|
KUBE_CONFIG_ENV_VAR = "KUBECONFIG"
|
@@ -21,14 +21,15 @@ from __future__ import annotations
|
|
21
21
|
from typing import TYPE_CHECKING, Sequence, Tuple
|
22
22
|
|
23
23
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
24
|
-
from google.api_core.retry import Retry
|
25
|
-
from google.cloud.language_v1.types import Document, EncodingType
|
26
24
|
from google.protobuf.json_format import MessageToDict
|
27
25
|
|
28
26
|
from airflow.providers.google.cloud.hooks.natural_language import CloudNaturalLanguageHook
|
29
27
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
30
28
|
|
31
29
|
if TYPE_CHECKING:
|
30
|
+
from google.api_core.retry import Retry
|
31
|
+
from google.cloud.language_v1.types import Document, EncodingType
|
32
|
+
|
32
33
|
from airflow.utils.context import Context
|
33
34
|
|
34
35
|
|
@@ -27,7 +27,6 @@ from __future__ import annotations
|
|
27
27
|
from typing import TYPE_CHECKING, Any, Callable, Sequence
|
28
28
|
|
29
29
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
30
|
-
from google.api_core.retry import Retry
|
31
30
|
from google.cloud.pubsub_v1.types import (
|
32
31
|
DeadLetterPolicy,
|
33
32
|
Duration,
|
@@ -43,6 +42,8 @@ from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink,
|
|
43
42
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
44
43
|
|
45
44
|
if TYPE_CHECKING:
|
45
|
+
from google.api_core.retry import Retry
|
46
|
+
|
46
47
|
from airflow.utils.context import Context
|
47
48
|
|
48
49
|
|
@@ -21,8 +21,6 @@ from __future__ import annotations
|
|
21
21
|
from typing import TYPE_CHECKING, Sequence
|
22
22
|
|
23
23
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
24
|
-
from google.api_core.retry import Retry
|
25
|
-
from google.cloud.speech_v1.types import RecognitionConfig
|
26
24
|
from google.protobuf.json_format import MessageToDict
|
27
25
|
|
28
26
|
from airflow.exceptions import AirflowException
|
@@ -31,6 +29,9 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
|
|
31
29
|
from airflow.providers.google.common.links.storage import FileDetailsLink
|
32
30
|
|
33
31
|
if TYPE_CHECKING:
|
32
|
+
from google.api_core.retry import Retry
|
33
|
+
from google.cloud.speech_v1.types import RecognitionConfig
|
34
|
+
|
34
35
|
from airflow.utils.context import Context
|
35
36
|
|
36
37
|
|
@@ -20,7 +20,6 @@ from __future__ import annotations
|
|
20
20
|
from typing import TYPE_CHECKING, Sequence
|
21
21
|
|
22
22
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
23
|
-
from google.api_core.retry import Retry
|
24
23
|
from google.cloud.monitoring_v3 import AlertPolicy, NotificationChannel
|
25
24
|
|
26
25
|
from airflow.providers.google.cloud.hooks.stackdriver import StackdriverHook
|
@@ -31,6 +30,8 @@ from airflow.providers.google.cloud.links.stackdriver import (
|
|
31
30
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
32
31
|
|
33
32
|
if TYPE_CHECKING:
|
33
|
+
from google.api_core.retry import Retry
|
34
|
+
|
34
35
|
from airflow.utils.context import Context
|
35
36
|
|
36
37
|
|
@@ -23,15 +23,16 @@ from typing import TYPE_CHECKING, Sequence, Tuple
|
|
23
23
|
|
24
24
|
from google.api_core.exceptions import AlreadyExists
|
25
25
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
26
|
-
from google.api_core.retry import Retry
|
27
26
|
from google.cloud.tasks_v2.types import Queue, Task
|
28
|
-
from google.protobuf.field_mask_pb2 import FieldMask
|
29
27
|
|
30
28
|
from airflow.providers.google.cloud.hooks.tasks import CloudTasksHook
|
31
29
|
from airflow.providers.google.cloud.links.cloud_tasks import CloudTasksLink, CloudTasksQueueLink
|
32
30
|
from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
|
33
31
|
|
34
32
|
if TYPE_CHECKING:
|
33
|
+
from google.api_core.retry import Retry
|
34
|
+
from google.protobuf.field_mask_pb2 import FieldMask
|
35
|
+
|
35
36
|
from airflow.utils.context import Context
|
36
37
|
|
37
38
|
|
@@ -22,8 +22,6 @@ from tempfile import NamedTemporaryFile
|
|
22
22
|
from typing import TYPE_CHECKING, Sequence
|
23
23
|
|
24
24
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
25
|
-
from google.api_core.retry import Retry
|
26
|
-
from google.cloud.texttospeech_v1.types import AudioConfig, SynthesisInput, VoiceSelectionParams
|
27
25
|
|
28
26
|
from airflow.exceptions import AirflowException
|
29
27
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
@@ -32,6 +30,9 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
|
|
32
30
|
from airflow.providers.google.common.links.storage import FileDetailsLink
|
33
31
|
|
34
32
|
if TYPE_CHECKING:
|
33
|
+
from google.api_core.retry import Retry
|
34
|
+
from google.cloud.texttospeech_v1.types import AudioConfig, SynthesisInput, VoiceSelectionParams
|
35
|
+
|
35
36
|
from airflow.utils.context import Context
|
36
37
|
|
37
38
|
|
@@ -20,7 +20,6 @@ from __future__ import annotations
|
|
20
20
|
|
21
21
|
from typing import TYPE_CHECKING, Sequence
|
22
22
|
|
23
|
-
from google.cloud.speech_v1.types import RecognitionAudio, RecognitionConfig
|
24
23
|
from google.protobuf.json_format import MessageToDict
|
25
24
|
|
26
25
|
from airflow.exceptions import AirflowException
|
@@ -30,6 +29,8 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
|
|
30
29
|
from airflow.providers.google.common.links.storage import FileDetailsLink
|
31
30
|
|
32
31
|
if TYPE_CHECKING:
|
32
|
+
from google.cloud.speech_v1.types import RecognitionAudio, RecognitionConfig
|
33
|
+
|
33
34
|
from airflow.utils.context import Context
|
34
35
|
|
35
36
|
|