apache-airflow-providers-google 10.7.0rc1__py3-none-any.whl → 10.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +10 -6
  3. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +4 -1
  4. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +31 -34
  5. airflow/providers/google/cloud/hooks/automl.py +11 -9
  6. airflow/providers/google/cloud/hooks/bigquery.py +30 -36
  7. airflow/providers/google/cloud/hooks/bigquery_dts.py +5 -3
  8. airflow/providers/google/cloud/hooks/bigtable.py +11 -8
  9. airflow/providers/google/cloud/hooks/cloud_batch.py +5 -3
  10. airflow/providers/google/cloud/hooks/cloud_build.py +6 -4
  11. airflow/providers/google/cloud/hooks/cloud_composer.py +14 -10
  12. airflow/providers/google/cloud/hooks/cloud_memorystore.py +5 -3
  13. airflow/providers/google/cloud/hooks/cloud_run.py +5 -3
  14. airflow/providers/google/cloud/hooks/cloud_sql.py +11 -14
  15. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +8 -6
  16. airflow/providers/google/cloud/hooks/compute.py +5 -3
  17. airflow/providers/google/cloud/hooks/compute_ssh.py +1 -1
  18. airflow/providers/google/cloud/hooks/datacatalog.py +5 -3
  19. airflow/providers/google/cloud/hooks/dataflow.py +8 -11
  20. airflow/providers/google/cloud/hooks/dataform.py +4 -2
  21. airflow/providers/google/cloud/hooks/datafusion.py +24 -6
  22. airflow/providers/google/cloud/hooks/dataplex.py +75 -6
  23. airflow/providers/google/cloud/hooks/dataproc.py +9 -7
  24. airflow/providers/google/cloud/hooks/dataproc_metastore.py +8 -6
  25. airflow/providers/google/cloud/hooks/dlp.py +139 -137
  26. airflow/providers/google/cloud/hooks/gcs.py +15 -20
  27. airflow/providers/google/cloud/hooks/kms.py +4 -2
  28. airflow/providers/google/cloud/hooks/kubernetes_engine.py +34 -34
  29. airflow/providers/google/cloud/hooks/looker.py +4 -1
  30. airflow/providers/google/cloud/hooks/mlengine.py +8 -6
  31. airflow/providers/google/cloud/hooks/natural_language.py +4 -2
  32. airflow/providers/google/cloud/hooks/os_login.py +9 -7
  33. airflow/providers/google/cloud/hooks/pubsub.py +13 -11
  34. airflow/providers/google/cloud/hooks/spanner.py +7 -5
  35. airflow/providers/google/cloud/hooks/speech_to_text.py +4 -2
  36. airflow/providers/google/cloud/hooks/stackdriver.py +6 -5
  37. airflow/providers/google/cloud/hooks/tasks.py +5 -3
  38. airflow/providers/google/cloud/hooks/text_to_speech.py +4 -2
  39. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +7 -5
  40. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +6 -4
  41. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +11 -9
  42. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +12 -10
  43. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +8 -6
  44. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +6 -4
  45. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +7 -5
  46. airflow/providers/google/cloud/hooks/video_intelligence.py +5 -3
  47. airflow/providers/google/cloud/hooks/vision.py +5 -3
  48. airflow/providers/google/cloud/hooks/workflows.py +8 -6
  49. airflow/providers/google/cloud/links/bigquery.py +1 -1
  50. airflow/providers/google/cloud/links/bigquery_dts.py +1 -1
  51. airflow/providers/google/cloud/links/cloud_functions.py +1 -1
  52. airflow/providers/google/cloud/links/cloud_memorystore.py +1 -1
  53. airflow/providers/google/cloud/links/cloud_sql.py +1 -1
  54. airflow/providers/google/cloud/links/cloud_tasks.py +1 -1
  55. airflow/providers/google/cloud/links/compute.py +1 -1
  56. airflow/providers/google/cloud/links/datacatalog.py +1 -1
  57. airflow/providers/google/cloud/links/dataflow.py +1 -1
  58. airflow/providers/google/cloud/links/dataform.py +1 -1
  59. airflow/providers/google/cloud/links/pubsub.py +1 -1
  60. airflow/providers/google/cloud/links/spanner.py +1 -1
  61. airflow/providers/google/cloud/links/stackdriver.py +1 -1
  62. airflow/providers/google/cloud/links/workflows.py +2 -2
  63. airflow/providers/google/cloud/log/gcs_task_handler.py +5 -7
  64. airflow/providers/google/cloud/log/stackdriver_task_handler.py +8 -4
  65. airflow/providers/google/cloud/operators/automl.py +2 -1
  66. airflow/providers/google/cloud/operators/bigquery.py +6 -2
  67. airflow/providers/google/cloud/operators/bigquery_dts.py +2 -1
  68. airflow/providers/google/cloud/operators/bigtable.py +5 -3
  69. airflow/providers/google/cloud/operators/cloud_batch.py +6 -3
  70. airflow/providers/google/cloud/operators/cloud_build.py +2 -1
  71. airflow/providers/google/cloud/operators/cloud_composer.py +3 -2
  72. airflow/providers/google/cloud/operators/cloud_memorystore.py +3 -2
  73. airflow/providers/google/cloud/operators/cloud_run.py +3 -2
  74. airflow/providers/google/cloud/operators/cloud_sql.py +157 -152
  75. airflow/providers/google/cloud/operators/compute.py +59 -61
  76. airflow/providers/google/cloud/operators/datacatalog.py +3 -2
  77. airflow/providers/google/cloud/operators/dataflow.py +3 -1
  78. airflow/providers/google/cloud/operators/dataform.py +2 -1
  79. airflow/providers/google/cloud/operators/datafusion.py +1 -1
  80. airflow/providers/google/cloud/operators/dataplex.py +110 -8
  81. airflow/providers/google/cloud/operators/dataproc.py +39 -18
  82. airflow/providers/google/cloud/operators/dataproc_metastore.py +2 -1
  83. airflow/providers/google/cloud/operators/dlp.py +3 -2
  84. airflow/providers/google/cloud/operators/functions.py +46 -46
  85. airflow/providers/google/cloud/operators/gcs.py +4 -6
  86. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -1
  87. airflow/providers/google/cloud/operators/natural_language.py +3 -2
  88. airflow/providers/google/cloud/operators/pubsub.py +2 -1
  89. airflow/providers/google/cloud/operators/speech_to_text.py +3 -2
  90. airflow/providers/google/cloud/operators/stackdriver.py +2 -1
  91. airflow/providers/google/cloud/operators/tasks.py +3 -2
  92. airflow/providers/google/cloud/operators/text_to_speech.py +3 -2
  93. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  94. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +2 -1
  95. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +3 -2
  96. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +2 -1
  97. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +3 -2
  98. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +4 -4
  99. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +3 -2
  100. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +2 -1
  101. airflow/providers/google/cloud/operators/video_intelligence.py +2 -1
  102. airflow/providers/google/cloud/operators/vision.py +3 -2
  103. airflow/providers/google/cloud/operators/workflows.py +7 -5
  104. airflow/providers/google/cloud/secrets/secret_manager.py +2 -2
  105. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  106. airflow/providers/google/cloud/sensors/dataplex.py +2 -1
  107. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
  108. airflow/providers/google/cloud/sensors/gcs.py +2 -1
  109. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  110. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +24 -10
  111. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  112. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +2 -1
  113. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -4
  114. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +1 -4
  115. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -1
  116. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +4 -2
  117. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +1 -3
  118. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +2 -2
  119. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +2 -1
  120. airflow/providers/google/cloud/transfers/presto_to_gcs.py +5 -4
  121. airflow/providers/google/cloud/transfers/sql_to_gcs.py +1 -1
  122. airflow/providers/google/cloud/transfers/trino_to_gcs.py +5 -4
  123. airflow/providers/google/cloud/triggers/bigquery.py +30 -36
  124. airflow/providers/google/cloud/triggers/bigquery_dts.py +9 -10
  125. airflow/providers/google/cloud/triggers/cloud_batch.py +6 -8
  126. airflow/providers/google/cloud/triggers/cloud_build.py +5 -6
  127. airflow/providers/google/cloud/triggers/cloud_run.py +4 -3
  128. airflow/providers/google/cloud/triggers/cloud_sql.py +10 -10
  129. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +1 -1
  130. airflow/providers/google/cloud/triggers/dataflow.py +5 -6
  131. airflow/providers/google/cloud/triggers/datafusion.py +5 -6
  132. airflow/providers/google/cloud/triggers/dataplex.py +110 -0
  133. airflow/providers/google/cloud/triggers/dataproc.py +18 -20
  134. airflow/providers/google/cloud/triggers/kubernetes_engine.py +14 -13
  135. airflow/providers/google/cloud/triggers/mlengine.py +5 -5
  136. airflow/providers/google/cloud/triggers/pubsub.py +2 -2
  137. airflow/providers/google/cloud/utils/bigquery_get_data.py +6 -3
  138. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  139. airflow/providers/google/cloud/utils/field_validator.py +13 -13
  140. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +5 -3
  141. airflow/providers/google/cloud/utils/mlengine_prediction_summary.py +1 -1
  142. airflow/providers/google/common/hooks/base_google.py +10 -2
  143. airflow/providers/google/common/links/storage.py +1 -1
  144. airflow/providers/google/common/utils/id_token_credentials.py +4 -1
  145. airflow/providers/google/get_provider_info.py +5 -0
  146. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +4 -2
  147. airflow/providers/google/marketing_platform/sensors/display_video.py +6 -3
  148. airflow/providers/google/suite/hooks/calendar.py +4 -2
  149. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/METADATA +8 -8
  150. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/RECORD +155 -173
  151. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/WHEEL +1 -1
  152. airflow/providers/google/ads/_vendor/__init__.py +0 -16
  153. airflow/providers/google/ads/_vendor/googleads/__init__.py +0 -16
  154. airflow/providers/google/ads/_vendor/googleads/interceptors/__init__.py +0 -16
  155. airflow/providers/google/ads/_vendor/googleads/v12/__init__.py +0 -16
  156. airflow/providers/google/ads/_vendor/googleads/v12/common/__init__.py +0 -16
  157. airflow/providers/google/ads/_vendor/googleads/v12/common/types/__init__.py +0 -16
  158. airflow/providers/google/ads/_vendor/googleads/v12/enums/__init__.py +0 -16
  159. airflow/providers/google/ads/_vendor/googleads/v12/enums/types/__init__.py +0 -16
  160. airflow/providers/google/ads/_vendor/googleads/v12/errors/__init__.py +0 -16
  161. airflow/providers/google/ads/_vendor/googleads/v12/errors/types/__init__.py +0 -16
  162. airflow/providers/google/ads/_vendor/googleads/v12/resources/__init__.py +0 -16
  163. airflow/providers/google/ads/_vendor/googleads/v12/resources/types/__init__.py +0 -16
  164. airflow/providers/google/ads/_vendor/googleads/v12/services/__init__.py +0 -16
  165. airflow/providers/google/ads/_vendor/googleads/v12/services/services/__init__.py +0 -16
  166. airflow/providers/google/ads/_vendor/googleads/v12/services/services/customer_service/__init__.py +0 -16
  167. airflow/providers/google/ads/_vendor/googleads/v12/services/services/customer_service/transports/__init__.py +0 -16
  168. airflow/providers/google/ads/_vendor/googleads/v12/services/services/google_ads_service/__init__.py +0 -16
  169. airflow/providers/google/ads/_vendor/googleads/v12/services/services/google_ads_service/transports/__init__.py +0 -16
  170. airflow/providers/google/ads/_vendor/googleads/v12/services/types/__init__.py +0 -16
  171. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/LICENSE +0 -0
  172. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/NOTICE +0 -0
  173. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/entry_points.txt +0 -0
  174. {apache_airflow_providers_google-10.7.0rc1.dist-info → apache_airflow_providers_google-10.8.0.dist-info}/top_level.txt +0 -0
@@ -22,17 +22,20 @@ from time import sleep
22
22
  from typing import TYPE_CHECKING, Any, Sequence
23
23
 
24
24
  from airflow import AirflowException
25
+ from airflow.providers.google.cloud.triggers.dataplex import DataplexDataQualityJobTrigger
25
26
 
26
27
  if TYPE_CHECKING:
28
+ from google.protobuf.field_mask_pb2 import FieldMask
29
+
27
30
  from airflow.utils.context import Context
28
31
 
29
32
  from google.api_core.exceptions import AlreadyExists, GoogleAPICallError
30
33
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
31
34
  from google.api_core.retry import Retry, exponential_sleep_generator
32
35
  from google.cloud.dataplex_v1.types import Asset, DataScan, DataScanJob, Lake, Task, Zone
33
- from google.protobuf.field_mask_pb2 import FieldMask
34
36
  from googleapiclient.errors import HttpError
35
37
 
38
+ from airflow.configuration import conf
36
39
  from airflow.providers.google.cloud.hooks.dataplex import AirflowDataQualityScanException, DataplexHook
37
40
  from airflow.providers.google.cloud.links.dataplex import (
38
41
  DataplexLakeLink,
@@ -894,6 +897,9 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
894
897
  :param result_timeout: Value in seconds for which operator will wait for the Data Quality scan result
895
898
  when the flag `asynchronous = False`.
896
899
  Throws exception if there is no result found after specified amount of seconds.
900
+ :param polling_interval_seconds: time in seconds between polling for job completion.
901
+ The value is considered only when running in deferrable mode. Must be greater than 0.
902
+ :param deferrable: Run operator in the deferrable mode.
897
903
 
898
904
  :return: Dataplex Data Quality scan job id.
899
905
  """
@@ -914,6 +920,8 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
914
920
  asynchronous: bool = False,
915
921
  fail_on_dq_failure: bool = False,
916
922
  result_timeout: float = 60.0 * 10,
923
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
924
+ polling_interval_seconds: int = 10,
917
925
  *args,
918
926
  **kwargs,
919
927
  ) -> None:
@@ -931,6 +939,8 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
931
939
  self.asynchronous = asynchronous
932
940
  self.fail_on_dq_failure = fail_on_dq_failure
933
941
  self.result_timeout = result_timeout
942
+ self.deferrable = deferrable
943
+ self.polling_interval_seconds = polling_interval_seconds
934
944
 
935
945
  def execute(self, context: Context) -> str:
936
946
  hook = DataplexHook(
@@ -948,6 +958,24 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
948
958
  metadata=self.metadata,
949
959
  )
950
960
  job_id = result.job.name.split("/")[-1]
961
+
962
+ if self.deferrable:
963
+ if self.asynchronous:
964
+ raise AirflowException(
965
+ "Both asynchronous and deferrable parameters were passed. Please, provide only one."
966
+ )
967
+ self.defer(
968
+ trigger=DataplexDataQualityJobTrigger(
969
+ job_id=job_id,
970
+ data_scan_id=self.data_scan_id,
971
+ project_id=self.project_id,
972
+ region=self.region,
973
+ gcp_conn_id=self.gcp_conn_id,
974
+ impersonation_chain=self.impersonation_chain,
975
+ polling_interval_seconds=self.polling_interval_seconds,
976
+ ),
977
+ method_name="execute_complete",
978
+ )
951
979
  if not self.asynchronous:
952
980
  job = hook.wait_for_data_scan_job(
953
981
  job_id=job_id,
@@ -973,6 +1001,31 @@ class DataplexRunDataQualityScanOperator(GoogleCloudBaseOperator):
973
1001
 
974
1002
  return job_id
975
1003
 
1004
+ def execute_complete(self, context, event=None) -> None:
1005
+ """
1006
+ Callback for when the trigger fires - returns immediately.
1007
+
1008
+ Relies on trigger to throw an exception, otherwise it assumes execution was
1009
+ successful.
1010
+ """
1011
+ job_state = event["job_state"]
1012
+ job_id = event["job_id"]
1013
+ if job_state == DataScanJob.State.FAILED:
1014
+ raise AirflowException(f"Job failed:\n{job_id}")
1015
+ if job_state == DataScanJob.State.CANCELLED:
1016
+ raise AirflowException(f"Job was cancelled:\n{job_id}")
1017
+ if job_state == DataScanJob.State.SUCCEEDED:
1018
+ job = event["job"]
1019
+ if not job["data_quality_result"]["passed"]:
1020
+ if self.fail_on_dq_failure:
1021
+ raise AirflowDataQualityScanException(
1022
+ f"Data Quality job {job_id} execution failed due to failure of its scanning "
1023
+ f"rules: {self.data_scan_id}"
1024
+ )
1025
+ else:
1026
+ self.log.info("Data Quality job executed successfully.")
1027
+ return job_id
1028
+
976
1029
 
977
1030
  class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
978
1031
  """
@@ -1005,6 +1058,9 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
1005
1058
  :param result_timeout: Value in seconds for which operator will wait for the Data Quality scan result
1006
1059
  when the flag `wait_for_result = True`.
1007
1060
  Throws exception if there is no result found after specified amount of seconds.
1061
+ :param polling_interval_seconds: time in seconds between polling for job completion.
1062
+ The value is considered only when running in deferrable mode. Must be greater than 0.
1063
+ :param deferrable: Run operator in the deferrable mode.
1008
1064
 
1009
1065
  :return: Dict representing DataScanJob.
1010
1066
  When the job completes with a successful status, information about the Data Quality result
@@ -1028,6 +1084,8 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
1028
1084
  fail_on_dq_failure: bool = False,
1029
1085
  wait_for_results: bool = True,
1030
1086
  result_timeout: float = 60.0 * 10,
1087
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1088
+ polling_interval_seconds: int = 10,
1031
1089
  *args,
1032
1090
  **kwargs,
1033
1091
  ) -> None:
@@ -1045,6 +1103,8 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
1045
1103
  self.fail_on_dq_failure = fail_on_dq_failure
1046
1104
  self.wait_for_results = wait_for_results
1047
1105
  self.result_timeout = result_timeout
1106
+ self.deferrable = deferrable
1107
+ self.polling_interval_seconds = polling_interval_seconds
1048
1108
 
1049
1109
  def execute(self, context: Context) -> dict:
1050
1110
  hook = DataplexHook(
@@ -1069,13 +1129,27 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
1069
1129
  self.job_id = job_id.split("/")[-1]
1070
1130
 
1071
1131
  if self.wait_for_results:
1072
- job = hook.wait_for_data_scan_job(
1073
- job_id=self.job_id,
1074
- data_scan_id=self.data_scan_id,
1075
- project_id=self.project_id,
1076
- region=self.region,
1077
- result_timeout=self.result_timeout,
1078
- )
1132
+ if self.deferrable:
1133
+ self.defer(
1134
+ trigger=DataplexDataQualityJobTrigger(
1135
+ job_id=self.job_id,
1136
+ data_scan_id=self.data_scan_id,
1137
+ project_id=self.project_id,
1138
+ region=self.region,
1139
+ gcp_conn_id=self.gcp_conn_id,
1140
+ impersonation_chain=self.impersonation_chain,
1141
+ polling_interval_seconds=self.polling_interval_seconds,
1142
+ ),
1143
+ method_name="execute_complete",
1144
+ )
1145
+ else:
1146
+ job = hook.wait_for_data_scan_job(
1147
+ job_id=self.job_id,
1148
+ data_scan_id=self.data_scan_id,
1149
+ project_id=self.project_id,
1150
+ region=self.region,
1151
+ result_timeout=self.result_timeout,
1152
+ )
1079
1153
  else:
1080
1154
  job = hook.get_data_scan_job(
1081
1155
  project_id=self.project_id,
@@ -1104,6 +1178,34 @@ class DataplexGetDataQualityScanResultOperator(GoogleCloudBaseOperator):
1104
1178
 
1105
1179
  return result
1106
1180
 
1181
+ def execute_complete(self, context, event=None) -> None:
1182
+ """
1183
+ Callback for when the trigger fires - returns immediately.
1184
+
1185
+ Relies on trigger to throw an exception, otherwise it assumes execution was
1186
+ successful.
1187
+ """
1188
+ job_state = event["job_state"]
1189
+ job_id = event["job_id"]
1190
+ job = event["job"]
1191
+ if job_state == DataScanJob.State.FAILED:
1192
+ raise AirflowException(f"Job failed:\n{job_id}")
1193
+ if job_state == DataScanJob.State.CANCELLED:
1194
+ raise AirflowException(f"Job was cancelled:\n{job_id}")
1195
+ if job_state == DataScanJob.State.SUCCEEDED:
1196
+ if not job["data_quality_result"]["passed"]:
1197
+ if self.fail_on_dq_failure:
1198
+ raise AirflowDataQualityScanException(
1199
+ f"Data Quality job {self.job_id} execution failed due to failure of its scanning "
1200
+ f"rules: {self.data_scan_id}"
1201
+ )
1202
+ else:
1203
+ self.log.info("Data Quality job executed successfully")
1204
+ else:
1205
+ self.log.info("Data Quality job execution returned status: %s", job_state)
1206
+
1207
+ return job
1208
+
1107
1209
 
1108
1210
  class DataplexCreateZoneOperator(GoogleCloudBaseOperator):
1109
1211
  """
@@ -29,13 +29,10 @@ from datetime import datetime, timedelta
29
29
  from enum import Enum
30
30
  from typing import TYPE_CHECKING, Any, Sequence
31
31
 
32
- from google.api_core import operation # type: ignore
33
32
  from google.api_core.exceptions import AlreadyExists, NotFound
34
33
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
35
34
  from google.api_core.retry import Retry, exponential_sleep_generator
36
35
  from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
37
- from google.protobuf.duration_pb2 import Duration
38
- from google.protobuf.field_mask_pb2 import FieldMask
39
36
 
40
37
  from airflow.configuration import conf
41
38
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -64,6 +61,10 @@ from airflow.providers.google.cloud.triggers.dataproc import (
64
61
  from airflow.utils import timezone
65
62
 
66
63
  if TYPE_CHECKING:
64
+ from google.api_core import operation
65
+ from google.protobuf.duration_pb2 import Duration
66
+ from google.protobuf.field_mask_pb2 import FieldMask
67
+
67
68
  from airflow.utils.context import Context
68
69
 
69
70
 
@@ -244,12 +245,13 @@ class ClusterGenerator:
244
245
  return PreemptibilityType(preemptibility.upper())
245
246
 
246
247
  def _get_init_action_timeout(self) -> dict:
247
- match = re.match(r"^(\d+)([sm])$", self.init_action_timeout)
248
+ match = re.fullmatch(r"(\d+)([sm])", self.init_action_timeout)
248
249
  if match:
249
- val = float(match.group(1))
250
- if match.group(2) == "s":
251
- return {"seconds": int(val)}
252
- elif match.group(2) == "m":
250
+ val = int(match.group(1))
251
+ unit = match.group(2)
252
+ if unit == "s":
253
+ return {"seconds": val}
254
+ elif unit == "m":
253
255
  return {"seconds": int(timedelta(minutes=val).total_seconds())}
254
256
 
255
257
  raise AirflowException(
@@ -594,13 +596,17 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
594
596
  if cluster.status.state != cluster.status.State.ERROR:
595
597
  return
596
598
  self.log.info("Cluster is in ERROR state")
599
+ self.log.info("Gathering diagnostic information.")
597
600
  gcs_uri = hook.diagnose_cluster(
598
601
  region=self.region, cluster_name=self.cluster_name, project_id=self.project_id
599
602
  )
600
603
  self.log.info("Diagnostic information for cluster %s available at: %s", self.cluster_name, gcs_uri)
601
604
  if self.delete_on_error:
602
605
  self._delete_cluster(hook)
603
- raise AirflowException("Cluster was created but was in ERROR state.")
606
+ # The delete op is asynchronous and can cause further failure if the cluster finishes
607
+ # deleting between catching AlreadyExists and checking state
608
+ self._wait_for_cluster_in_deleting_state(hook)
609
+ raise AirflowException("Cluster was created in an ERROR state then deleted.")
604
610
  raise AirflowException("Cluster was created but is in ERROR state")
605
611
 
606
612
  def _wait_for_cluster_in_deleting_state(self, hook: DataprocHook) -> None:
@@ -667,6 +673,22 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
667
673
  raise
668
674
  self.log.info("Cluster already exists.")
669
675
  cluster = self._get_cluster(hook)
676
+ except AirflowException as ae:
677
+ # There still could be a cluster created here in an ERROR state which
678
+ # should be deleted immediately rather than consuming another retry attempt
679
+ # (assuming delete_on_error is true (default))
680
+ # This reduces overall the number of task attempts from 3 to 2 to successful cluster creation
681
+ # assuming the underlying GCE issues have resolved within that window. Users can configure
682
+ # a higher number of retry attempts in powers of two with 30s-60s wait interval
683
+ try:
684
+ cluster = self._get_cluster(hook)
685
+ self._handle_error_state(hook, cluster)
686
+ except AirflowException as ae_inner:
687
+ # We could get any number of failures here, including cluster not found and we
688
+ # can just ignore to ensure we surface the original cluster create failure
689
+ self.log.error(ae_inner, exc_info=True)
690
+ finally:
691
+ raise ae
670
692
 
671
693
  # Check if cluster is not in ERROR state
672
694
  self._handle_error_state(hook, cluster)
@@ -790,18 +812,17 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
790
812
  return None
791
813
 
792
814
  timeout = None
793
- match = re.match(r"^(\d+)([smdh])$", self.graceful_decommission_timeout)
815
+ match = re.fullmatch(r"(\d+)([smdh])", self.graceful_decommission_timeout)
794
816
  if match:
795
- if match.group(2) == "s":
796
- timeout = int(match.group(1))
797
- elif match.group(2) == "m":
798
- val = float(match.group(1))
817
+ val = int(match.group(1))
818
+ unit = match.group(2)
819
+ if unit == "s":
820
+ timeout = val
821
+ elif unit == "m":
799
822
  timeout = int(timedelta(minutes=val).total_seconds())
800
- elif match.group(2) == "h":
801
- val = float(match.group(1))
823
+ elif unit == "h":
802
824
  timeout = int(timedelta(hours=val).total_seconds())
803
- elif match.group(2) == "d":
804
- val = float(match.group(1))
825
+ elif unit == "d":
805
826
  timeout = int(timedelta(days=val).total_seconds())
806
827
 
807
828
  if not timeout:
@@ -26,7 +26,6 @@ from google.api_core.retry import Retry, exponential_sleep_generator
26
26
  from google.cloud.metastore_v1 import MetadataExport, MetadataManagementActivity
27
27
  from google.cloud.metastore_v1.types import Backup, MetadataImport, Service
28
28
  from google.cloud.metastore_v1.types.metastore import DatabaseDumpSpec, Restore
29
- from google.protobuf.field_mask_pb2 import FieldMask
30
29
  from googleapiclient.errors import HttpError
31
30
 
32
31
  from airflow import AirflowException
@@ -37,6 +36,8 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
37
36
  from airflow.providers.google.common.links.storage import StorageLink
38
37
 
39
38
  if TYPE_CHECKING:
39
+ from google.protobuf.field_mask_pb2 import FieldMask
40
+
40
41
  from airflow.models.taskinstancekey import TaskInstanceKey
41
42
  from airflow.utils.context import Context
42
43
 
@@ -23,7 +23,6 @@ from typing import TYPE_CHECKING, Sequence
23
23
 
24
24
  from google.api_core.exceptions import AlreadyExists, InvalidArgument, NotFound
25
25
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
26
- from google.api_core.retry import Retry
27
26
  from google.cloud.dlp_v2.types import (
28
27
  ByteContentItem,
29
28
  ContentItem,
@@ -44,7 +43,6 @@ from google.cloud.dlp_v2.types import (
44
43
  StoredInfoType,
45
44
  StoredInfoTypeConfig,
46
45
  )
47
- from google.protobuf.field_mask_pb2 import FieldMask
48
46
 
49
47
  from airflow.providers.google.cloud.hooks.dlp import CloudDLPHook
50
48
  from airflow.providers.google.cloud.links.data_loss_prevention import (
@@ -63,6 +61,9 @@ from airflow.providers.google.cloud.links.data_loss_prevention import (
63
61
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
64
62
 
65
63
  if TYPE_CHECKING:
64
+ from google.api_core.retry import Retry
65
+ from google.protobuf.field_mask_pb2 import FieldMask
66
+
66
67
  from airflow.utils.context import Context
67
68
 
68
69
 
@@ -51,54 +51,54 @@ def _validate_max_instances(value):
51
51
 
52
52
 
53
53
  CLOUD_FUNCTION_VALIDATION: list[dict[str, Any]] = [
54
- dict(name="name", regexp="^.+$"),
55
- dict(name="description", regexp="^.+$", optional=True),
56
- dict(name="entryPoint", regexp=r"^.+$", optional=True),
57
- dict(name="runtime", regexp=r"^.+$", optional=True),
58
- dict(name="timeout", regexp=r"^.+$", optional=True),
59
- dict(name="availableMemoryMb", custom_validation=_validate_available_memory_in_mb, optional=True),
60
- dict(name="labels", optional=True),
61
- dict(name="environmentVariables", optional=True),
62
- dict(name="network", regexp=r"^.+$", optional=True),
63
- dict(name="maxInstances", optional=True, custom_validation=_validate_max_instances),
64
- dict(
65
- name="source_code",
66
- type="union",
67
- fields=[
68
- dict(name="sourceArchiveUrl", regexp=r"^.+$"),
69
- dict(name="sourceRepositoryUrl", regexp=r"^.+$", api_version="v1beta2"),
70
- dict(name="sourceRepository", type="dict", fields=[dict(name="url", regexp=r"^.+$")]),
71
- dict(name="sourceUploadUrl"),
54
+ {"name": "name", "regexp": "^.+$"},
55
+ {"name": "description", "regexp": "^.+$", "optional": True},
56
+ {"name": "entryPoint", "regexp": r"^.+$", "optional": True},
57
+ {"name": "runtime", "regexp": r"^.+$", "optional": True},
58
+ {"name": "timeout", "regexp": r"^.+$", "optional": True},
59
+ {"name": "availableMemoryMb", "custom_validation": _validate_available_memory_in_mb, "optional": True},
60
+ {"name": "labels", "optional": True},
61
+ {"name": "environmentVariables", "optional": True},
62
+ {"name": "network", "regexp": r"^.+$", "optional": True},
63
+ {"name": "maxInstances", "optional": True, "custom_validation": _validate_max_instances},
64
+ {
65
+ "name": "source_code",
66
+ "type": "union",
67
+ "fields": [
68
+ {"name": "sourceArchiveUrl", "regexp": r"^.+$"},
69
+ {"name": "sourceRepositoryUrl", "regexp": r"^.+$", "api_version": "v1beta2"},
70
+ {"name": "sourceRepository", "type": "dict", "fields": [{"name": "url", "regexp": r"^.+$"}]},
71
+ {"name": "sourceUploadUrl"},
72
72
  ],
73
- ),
74
- dict(
75
- name="trigger",
76
- type="union",
77
- fields=[
78
- dict(
79
- name="httpsTrigger",
80
- type="dict",
81
- fields=[
73
+ },
74
+ {
75
+ "name": "trigger",
76
+ "type": "union",
77
+ "fields": [
78
+ {
79
+ "name": "httpsTrigger",
80
+ "type": "dict",
81
+ "fields": [
82
82
  # This dict should be empty at input (url is added at output)
83
83
  ],
84
- ),
85
- dict(
86
- name="eventTrigger",
87
- type="dict",
88
- fields=[
89
- dict(name="eventType", regexp=r"^.+$"),
90
- dict(name="resource", regexp=r"^.+$"),
91
- dict(name="service", regexp=r"^.+$", optional=True),
92
- dict(
93
- name="failurePolicy",
94
- type="dict",
95
- optional=True,
96
- fields=[dict(name="retry", type="dict", optional=True)],
97
- ),
84
+ },
85
+ {
86
+ "name": "eventTrigger",
87
+ "type": "dict",
88
+ "fields": [
89
+ {"name": "eventType", "regexp": r"^.+$"},
90
+ {"name": "resource", "regexp": r"^.+$"},
91
+ {"name": "service", "regexp": r"^.+$", "optional": True},
92
+ {
93
+ "name": "failurePolicy",
94
+ "type": "dict",
95
+ "optional": True,
96
+ "fields": [{"name": "retry", "type": "dict", "optional": True}],
97
+ },
98
98
  ],
99
- ),
99
+ },
100
100
  ],
101
- ),
101
+ },
102
102
  ]
103
103
 
104
104
 
@@ -282,9 +282,9 @@ class ZipPathPreprocessor:
282
282
  if self._is_present_and_empty(self.body, GCF_SOURCE_UPLOAD_URL):
283
283
  if not self.zip_path:
284
284
  raise AirflowException(
285
- "Parameter '{url}' is empty in the body and argument '{path}' "
286
- "is missing or empty. You need to have non empty '{path}' "
287
- "when '{url}' is present and empty.".format(url=GCF_SOURCE_UPLOAD_URL, path=GCF_ZIP_PATH)
285
+ f"Parameter '{GCF_SOURCE_UPLOAD_URL}' is empty in the body and argument '{GCF_ZIP_PATH}' "
286
+ f"is missing or empty. You need to have non empty '{GCF_ZIP_PATH}' "
287
+ f"when '{GCF_SOURCE_UPLOAD_URL}' is present and empty."
288
288
  )
289
289
 
290
290
  def _verify_upload_url_and_zip_path(self) -> None:
@@ -797,9 +797,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
797
797
  num_max_attempts=self.download_num_attempts,
798
798
  )
799
799
  except GoogleCloudError:
800
- if self.download_continue_on_fail:
801
- continue
802
- raise
800
+ if not self.download_continue_on_fail:
801
+ raise
803
802
 
804
803
  self.log.info("Starting the transformation")
805
804
  cmd = [self.transform_script] if isinstance(self.transform_script, str) else self.transform_script
@@ -847,9 +846,8 @@ class GCSTimeSpanFileTransformOperator(GoogleCloudBaseOperator):
847
846
  )
848
847
  files_uploaded.append(str(upload_file_name))
849
848
  except GoogleCloudError:
850
- if self.upload_continue_on_fail:
851
- continue
852
- raise
849
+ if not self.upload_continue_on_fail:
850
+ raise
853
851
 
854
852
  return files_uploaded
855
853
 
@@ -24,7 +24,6 @@ from typing import TYPE_CHECKING, Any, Sequence
24
24
 
25
25
  from google.api_core.exceptions import AlreadyExists
26
26
  from google.cloud.container_v1.types import Cluster
27
- from kubernetes.client.models import V1Pod
28
27
 
29
28
  from airflow.configuration import conf
30
29
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -46,6 +45,8 @@ from airflow.providers.google.cloud.triggers.kubernetes_engine import GKEOperati
46
45
  from airflow.utils.timezone import utcnow
47
46
 
48
47
  if TYPE_CHECKING:
48
+ from kubernetes.client.models import V1Pod
49
+
49
50
  from airflow.utils.context import Context
50
51
 
51
52
  KUBE_CONFIG_ENV_VAR = "KUBECONFIG"
@@ -21,14 +21,15 @@ from __future__ import annotations
21
21
  from typing import TYPE_CHECKING, Sequence, Tuple
22
22
 
23
23
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
24
- from google.api_core.retry import Retry
25
- from google.cloud.language_v1.types import Document, EncodingType
26
24
  from google.protobuf.json_format import MessageToDict
27
25
 
28
26
  from airflow.providers.google.cloud.hooks.natural_language import CloudNaturalLanguageHook
29
27
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
30
28
 
31
29
  if TYPE_CHECKING:
30
+ from google.api_core.retry import Retry
31
+ from google.cloud.language_v1.types import Document, EncodingType
32
+
32
33
  from airflow.utils.context import Context
33
34
 
34
35
 
@@ -27,7 +27,6 @@ from __future__ import annotations
27
27
  from typing import TYPE_CHECKING, Any, Callable, Sequence
28
28
 
29
29
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
30
- from google.api_core.retry import Retry
31
30
  from google.cloud.pubsub_v1.types import (
32
31
  DeadLetterPolicy,
33
32
  Duration,
@@ -43,6 +42,8 @@ from airflow.providers.google.cloud.links.pubsub import PubSubSubscriptionLink,
43
42
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
44
43
 
45
44
  if TYPE_CHECKING:
45
+ from google.api_core.retry import Retry
46
+
46
47
  from airflow.utils.context import Context
47
48
 
48
49
 
@@ -21,8 +21,6 @@ from __future__ import annotations
21
21
  from typing import TYPE_CHECKING, Sequence
22
22
 
23
23
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
24
- from google.api_core.retry import Retry
25
- from google.cloud.speech_v1.types import RecognitionConfig
26
24
  from google.protobuf.json_format import MessageToDict
27
25
 
28
26
  from airflow.exceptions import AirflowException
@@ -31,6 +29,9 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
31
29
  from airflow.providers.google.common.links.storage import FileDetailsLink
32
30
 
33
31
  if TYPE_CHECKING:
32
+ from google.api_core.retry import Retry
33
+ from google.cloud.speech_v1.types import RecognitionConfig
34
+
34
35
  from airflow.utils.context import Context
35
36
 
36
37
 
@@ -20,7 +20,6 @@ from __future__ import annotations
20
20
  from typing import TYPE_CHECKING, Sequence
21
21
 
22
22
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
23
- from google.api_core.retry import Retry
24
23
  from google.cloud.monitoring_v3 import AlertPolicy, NotificationChannel
25
24
 
26
25
  from airflow.providers.google.cloud.hooks.stackdriver import StackdriverHook
@@ -31,6 +30,8 @@ from airflow.providers.google.cloud.links.stackdriver import (
31
30
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
32
31
 
33
32
  if TYPE_CHECKING:
33
+ from google.api_core.retry import Retry
34
+
34
35
  from airflow.utils.context import Context
35
36
 
36
37
 
@@ -23,15 +23,16 @@ from typing import TYPE_CHECKING, Sequence, Tuple
23
23
 
24
24
  from google.api_core.exceptions import AlreadyExists
25
25
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
26
- from google.api_core.retry import Retry
27
26
  from google.cloud.tasks_v2.types import Queue, Task
28
- from google.protobuf.field_mask_pb2 import FieldMask
29
27
 
30
28
  from airflow.providers.google.cloud.hooks.tasks import CloudTasksHook
31
29
  from airflow.providers.google.cloud.links.cloud_tasks import CloudTasksLink, CloudTasksQueueLink
32
30
  from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseOperator
33
31
 
34
32
  if TYPE_CHECKING:
33
+ from google.api_core.retry import Retry
34
+ from google.protobuf.field_mask_pb2 import FieldMask
35
+
35
36
  from airflow.utils.context import Context
36
37
 
37
38
 
@@ -22,8 +22,6 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING, Sequence
23
23
 
24
24
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
25
- from google.api_core.retry import Retry
26
- from google.cloud.texttospeech_v1.types import AudioConfig, SynthesisInput, VoiceSelectionParams
27
25
 
28
26
  from airflow.exceptions import AirflowException
29
27
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
@@ -32,6 +30,9 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
32
30
  from airflow.providers.google.common.links.storage import FileDetailsLink
33
31
 
34
32
  if TYPE_CHECKING:
33
+ from google.api_core.retry import Retry
34
+ from google.cloud.texttospeech_v1.types import AudioConfig, SynthesisInput, VoiceSelectionParams
35
+
35
36
  from airflow.utils.context import Context
36
37
 
37
38
 
@@ -20,7 +20,6 @@ from __future__ import annotations
20
20
 
21
21
  from typing import TYPE_CHECKING, Sequence
22
22
 
23
- from google.cloud.speech_v1.types import RecognitionAudio, RecognitionConfig
24
23
  from google.protobuf.json_format import MessageToDict
25
24
 
26
25
  from airflow.exceptions import AirflowException
@@ -30,6 +29,8 @@ from airflow.providers.google.cloud.operators.cloud_base import GoogleCloudBaseO
30
29
  from airflow.providers.google.common.links.storage import FileDetailsLink
31
30
 
32
31
  if TYPE_CHECKING:
32
+ from google.cloud.speech_v1.types import RecognitionAudio, RecognitionConfig
33
+
33
34
  from airflow.utils.context import Context
34
35
 
35
36