apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
  2. airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
  3. airflow/providers/google/__init__.py +3 -3
  4. airflow/providers/google/_vendor/__init__.py +0 -0
  5. airflow/providers/google/_vendor/json_merge_patch.py +91 -0
  6. airflow/providers/google/ads/hooks/ads.py +52 -43
  7. airflow/providers/google/ads/operators/ads.py +2 -2
  8. airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
  9. airflow/providers/google/assets/gcs.py +1 -11
  10. airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
  11. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  12. airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
  13. airflow/providers/google/cloud/hooks/bigquery.py +195 -318
  14. airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
  15. airflow/providers/google/cloud/hooks/bigtable.py +3 -2
  16. airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
  17. airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
  18. airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
  19. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  20. airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
  21. airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
  22. airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
  23. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
  24. airflow/providers/google/cloud/hooks/compute.py +7 -6
  25. airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
  26. airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
  27. airflow/providers/google/cloud/hooks/dataflow.py +87 -242
  28. airflow/providers/google/cloud/hooks/dataform.py +9 -14
  29. airflow/providers/google/cloud/hooks/datafusion.py +7 -9
  30. airflow/providers/google/cloud/hooks/dataplex.py +13 -12
  31. airflow/providers/google/cloud/hooks/dataprep.py +2 -2
  32. airflow/providers/google/cloud/hooks/dataproc.py +76 -74
  33. airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
  34. airflow/providers/google/cloud/hooks/dlp.py +5 -4
  35. airflow/providers/google/cloud/hooks/gcs.py +144 -33
  36. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  37. airflow/providers/google/cloud/hooks/kms.py +3 -2
  38. airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
  39. airflow/providers/google/cloud/hooks/looker.py +6 -1
  40. airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
  41. airflow/providers/google/cloud/hooks/mlengine.py +7 -8
  42. airflow/providers/google/cloud/hooks/natural_language.py +3 -2
  43. airflow/providers/google/cloud/hooks/os_login.py +3 -2
  44. airflow/providers/google/cloud/hooks/pubsub.py +6 -6
  45. airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
  46. airflow/providers/google/cloud/hooks/spanner.py +75 -10
  47. airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
  48. airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
  49. airflow/providers/google/cloud/hooks/tasks.py +4 -3
  50. airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
  51. airflow/providers/google/cloud/hooks/translate.py +8 -17
  52. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
  53. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
  54. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
  55. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
  56. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
  57. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  58. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
  59. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  60. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
  61. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
  62. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
  63. airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
  64. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  65. airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
  66. airflow/providers/google/cloud/hooks/vision.py +7 -7
  67. airflow/providers/google/cloud/hooks/workflows.py +4 -3
  68. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  69. airflow/providers/google/cloud/links/base.py +77 -7
  70. airflow/providers/google/cloud/links/bigquery.py +0 -47
  71. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  72. airflow/providers/google/cloud/links/bigtable.py +0 -48
  73. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  74. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  75. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  76. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  77. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  78. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
  79. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  80. airflow/providers/google/cloud/links/compute.py +0 -58
  81. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  82. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  83. airflow/providers/google/cloud/links/dataflow.py +0 -34
  84. airflow/providers/google/cloud/links/dataform.py +0 -64
  85. airflow/providers/google/cloud/links/datafusion.py +1 -90
  86. airflow/providers/google/cloud/links/dataplex.py +0 -154
  87. airflow/providers/google/cloud/links/dataprep.py +0 -24
  88. airflow/providers/google/cloud/links/dataproc.py +11 -89
  89. airflow/providers/google/cloud/links/datastore.py +0 -31
  90. airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
  91. airflow/providers/google/cloud/links/managed_kafka.py +11 -51
  92. airflow/providers/google/cloud/links/mlengine.py +0 -70
  93. airflow/providers/google/cloud/links/pubsub.py +0 -32
  94. airflow/providers/google/cloud/links/spanner.py +0 -33
  95. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  96. airflow/providers/google/cloud/links/translate.py +17 -187
  97. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  98. airflow/providers/google/cloud/links/workflows.py +0 -52
  99. airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
  100. airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
  101. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  102. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  103. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  104. airflow/providers/google/cloud/openlineage/facets.py +141 -40
  105. airflow/providers/google/cloud/openlineage/mixins.py +14 -13
  106. airflow/providers/google/cloud/openlineage/utils.py +19 -3
  107. airflow/providers/google/cloud/operators/alloy_db.py +76 -61
  108. airflow/providers/google/cloud/operators/bigquery.py +104 -667
  109. airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
  110. airflow/providers/google/cloud/operators/bigtable.py +38 -7
  111. airflow/providers/google/cloud/operators/cloud_base.py +22 -1
  112. airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
  113. airflow/providers/google/cloud/operators/cloud_build.py +80 -36
  114. airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
  115. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  116. airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
  117. airflow/providers/google/cloud/operators/cloud_run.py +39 -20
  118. airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
  119. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
  120. airflow/providers/google/cloud/operators/compute.py +18 -50
  121. airflow/providers/google/cloud/operators/datacatalog.py +167 -29
  122. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  123. airflow/providers/google/cloud/operators/dataform.py +19 -7
  124. airflow/providers/google/cloud/operators/datafusion.py +43 -43
  125. airflow/providers/google/cloud/operators/dataplex.py +212 -126
  126. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  127. airflow/providers/google/cloud/operators/dataproc.py +134 -207
  128. airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
  129. airflow/providers/google/cloud/operators/datastore.py +22 -6
  130. airflow/providers/google/cloud/operators/dlp.py +24 -45
  131. airflow/providers/google/cloud/operators/functions.py +21 -14
  132. airflow/providers/google/cloud/operators/gcs.py +15 -12
  133. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  134. airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
  135. airflow/providers/google/cloud/operators/looker.py +1 -1
  136. airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
  137. airflow/providers/google/cloud/operators/natural_language.py +5 -3
  138. airflow/providers/google/cloud/operators/pubsub.py +69 -21
  139. airflow/providers/google/cloud/operators/spanner.py +53 -45
  140. airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
  141. airflow/providers/google/cloud/operators/stackdriver.py +5 -11
  142. airflow/providers/google/cloud/operators/tasks.py +6 -15
  143. airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
  144. airflow/providers/google/cloud/operators/translate.py +46 -20
  145. airflow/providers/google/cloud/operators/translate_speech.py +4 -3
  146. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
  147. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
  148. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
  149. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
  150. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
  151. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  152. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  153. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  154. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
  155. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
  156. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
  157. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  158. airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
  159. airflow/providers/google/cloud/operators/vision.py +7 -5
  160. airflow/providers/google/cloud/operators/workflows.py +24 -19
  161. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  162. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  163. airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
  164. airflow/providers/google/cloud/sensors/bigtable.py +14 -6
  165. airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
  166. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
  167. airflow/providers/google/cloud/sensors/dataflow.py +27 -10
  168. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  169. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  170. airflow/providers/google/cloud/sensors/dataplex.py +7 -5
  171. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  172. airflow/providers/google/cloud/sensors/dataproc.py +10 -9
  173. airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
  174. airflow/providers/google/cloud/sensors/gcs.py +22 -21
  175. airflow/providers/google/cloud/sensors/looker.py +5 -5
  176. airflow/providers/google/cloud/sensors/pubsub.py +20 -20
  177. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  178. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  179. airflow/providers/google/cloud/sensors/workflows.py +6 -4
  180. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  181. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  182. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  183. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  184. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
  185. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  186. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  187. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  188. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  189. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  190. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
  191. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
  192. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
  193. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  194. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  195. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  196. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  197. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  198. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  199. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  200. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  201. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  202. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
  203. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  204. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  205. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
  206. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  207. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  208. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  209. airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
  210. airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
  211. airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
  212. airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
  213. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  214. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
  215. airflow/providers/google/cloud/triggers/dataflow.py +125 -2
  216. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  217. airflow/providers/google/cloud/triggers/dataplex.py +16 -3
  218. airflow/providers/google/cloud/triggers/dataproc.py +124 -53
  219. airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
  220. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  221. airflow/providers/google/cloud/triggers/pubsub.py +17 -20
  222. airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
  223. airflow/providers/google/cloud/utils/bigquery.py +5 -7
  224. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  225. airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
  226. airflow/providers/google/cloud/utils/dataform.py +1 -1
  227. airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
  228. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  229. airflow/providers/google/cloud/utils/validators.py +43 -0
  230. airflow/providers/google/common/auth_backend/google_openid.py +26 -9
  231. airflow/providers/google/common/consts.py +2 -1
  232. airflow/providers/google/common/deprecated.py +2 -1
  233. airflow/providers/google/common/hooks/base_google.py +40 -43
  234. airflow/providers/google/common/hooks/operation_helpers.py +78 -0
  235. airflow/providers/google/common/links/storage.py +0 -22
  236. airflow/providers/google/common/utils/get_secret.py +31 -0
  237. airflow/providers/google/common/utils/id_token_credentials.py +4 -5
  238. airflow/providers/google/firebase/operators/firestore.py +2 -2
  239. airflow/providers/google/get_provider_info.py +61 -216
  240. airflow/providers/google/go_module_utils.py +35 -3
  241. airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
  242. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  243. airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
  244. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  245. airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
  246. airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
  247. airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
  248. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  249. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  250. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  251. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  252. airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
  253. airflow/providers/google/suite/hooks/calendar.py +1 -1
  254. airflow/providers/google/suite/hooks/drive.py +2 -2
  255. airflow/providers/google/suite/hooks/sheets.py +15 -1
  256. airflow/providers/google/suite/operators/sheets.py +8 -3
  257. airflow/providers/google/suite/sensors/drive.py +2 -2
  258. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  259. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  260. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  261. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  262. airflow/providers/google/version_compat.py +15 -1
  263. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
  264. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  265. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
  266. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  267. airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
  268. airflow/providers/google/cloud/hooks/automl.py +0 -679
  269. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  270. airflow/providers/google/cloud/links/automl.py +0 -193
  271. airflow/providers/google/cloud/operators/automl.py +0 -1360
  272. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  273. airflow/providers/google/cloud/operators/mlengine.py +0 -1515
  274. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  275. apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
  276. /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
  277. {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  278. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -30,6 +30,11 @@ from enum import Enum
30
30
  from functools import cached_property
31
31
  from typing import TYPE_CHECKING, Any
32
32
 
33
+ from google.api_core.exceptions import AlreadyExists, NotFound
34
+ from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
35
+ from google.api_core.retry import Retry, exponential_sleep_generator
36
+ from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
37
+
33
38
  from airflow.configuration import conf
34
39
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
35
40
  from airflow.providers.google.cloud.hooks.dataproc import (
@@ -39,7 +44,6 @@ from airflow.providers.google.cloud.hooks.dataproc import (
39
44
  )
40
45
  from airflow.providers.google.cloud.links.dataproc import (
41
46
  DATAPROC_BATCH_LINK,
42
- DATAPROC_CLUSTER_LINK_DEPRECATED,
43
47
  DATAPROC_JOB_LINK_DEPRECATED,
44
48
  DataprocBatchesListLink,
45
49
  DataprocBatchLink,
@@ -58,22 +62,18 @@ from airflow.providers.google.cloud.triggers.dataproc import (
58
62
  DataprocSubmitTrigger,
59
63
  )
60
64
  from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
61
- from airflow.providers.google.common.deprecated import deprecated
62
65
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
63
66
  from airflow.utils import timezone
64
- from google.api_core.exceptions import AlreadyExists, NotFound
65
- from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
66
- from google.api_core.retry import Retry, exponential_sleep_generator
67
- from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
68
67
 
69
68
  if TYPE_CHECKING:
70
- from airflow.utils.context import Context
71
69
  from google.api_core import operation
72
70
  from google.api_core.retry_async import AsyncRetry
73
71
  from google.protobuf.duration_pb2 import Duration
74
72
  from google.protobuf.field_mask_pb2 import FieldMask
75
73
  from google.type.interval_pb2 import Interval
76
74
 
75
+ from airflow.providers.common.compat.sdk import Context
76
+
77
77
 
78
78
  class PreemptibilityType(Enum):
79
79
  """Contains possible Type values of Preemptibility applicable for every secondary worker of Cluster."""
@@ -213,6 +213,7 @@ class ClusterGenerator:
213
213
  :param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
214
214
  see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
215
215
  :param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
216
+ :param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD" / "CLUSTER_TIER_PREMIUM").
216
217
  """
217
218
 
218
219
  def __init__(
@@ -261,6 +262,8 @@ class ClusterGenerator:
261
262
  secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
262
263
  secondary_worker_accelerator_type: str | None = None,
263
264
  secondary_worker_accelerator_count: int | None = None,
265
+ *,
266
+ cluster_tier: str | None = None,
264
267
  **kwargs,
265
268
  ) -> None:
266
269
  self.project_id = project_id
@@ -308,6 +311,7 @@ class ClusterGenerator:
308
311
  self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
309
312
  self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
310
313
  self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
314
+ self.cluster_tier = cluster_tier
311
315
 
312
316
  if self.custom_image and self.image_version:
313
317
  raise ValueError("The custom_image and image_version can't be both set")
@@ -340,7 +344,7 @@ class ClusterGenerator:
340
344
  unit = match.group(2)
341
345
  if unit == "s":
342
346
  return {"seconds": val}
343
- elif unit == "m":
347
+ if unit == "m":
344
348
  return {"seconds": int(timedelta(minutes=val).total_seconds())}
345
349
 
346
350
  raise AirflowException(
@@ -513,6 +517,9 @@ class ClusterGenerator:
513
517
  if self.driver_pool_size > 0:
514
518
  cluster_data["auxiliary_node_groups"] = [self._build_driver_pool()]
515
519
 
520
+ if self.cluster_tier:
521
+ cluster_data["cluster_tier"] = self.cluster_tier
522
+
516
523
  cluster_data = self._build_gce_cluster_config(cluster_data)
517
524
 
518
525
  if self.single_node:
@@ -621,6 +628,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
621
628
  "virtual_cluster_config",
622
629
  "cluster_name",
623
630
  "labels",
631
+ "gcp_conn_id",
624
632
  "impersonation_chain",
625
633
  )
626
634
  template_fields_renderers = {"cluster_config": "json", "virtual_cluster_config": "json"}
@@ -807,7 +815,6 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
807
815
  if project_id:
808
816
  DataprocClusterLink.persist(
809
817
  context=context,
810
- operator=self,
811
818
  cluster_id=self.cluster_name,
812
819
  project_id=project_id,
813
820
  region=self.region,
@@ -822,26 +829,24 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
822
829
  )
823
830
  self.log.info("Cluster created.")
824
831
  return Cluster.to_dict(cluster)
825
- else:
826
- cluster = hook.get_cluster(
827
- project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
828
- )
829
- if cluster.status.state == cluster.status.State.RUNNING:
830
- self.log.info("Cluster created.")
831
- return Cluster.to_dict(cluster)
832
- else:
833
- self.defer(
834
- trigger=DataprocClusterTrigger(
835
- cluster_name=self.cluster_name,
836
- project_id=self.project_id,
837
- region=self.region,
838
- gcp_conn_id=self.gcp_conn_id,
839
- impersonation_chain=self.impersonation_chain,
840
- polling_interval_seconds=self.polling_interval_seconds,
841
- delete_on_error=self.delete_on_error,
842
- ),
843
- method_name="execute_complete",
844
- )
832
+ cluster = hook.get_cluster(
833
+ project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
834
+ )
835
+ if cluster.status.state == cluster.status.State.RUNNING:
836
+ self.log.info("Cluster created.")
837
+ return Cluster.to_dict(cluster)
838
+ self.defer(
839
+ trigger=DataprocClusterTrigger(
840
+ cluster_name=self.cluster_name,
841
+ project_id=self.project_id,
842
+ region=self.region,
843
+ gcp_conn_id=self.gcp_conn_id,
844
+ impersonation_chain=self.impersonation_chain,
845
+ polling_interval_seconds=self.polling_interval_seconds,
846
+ delete_on_error=self.delete_on_error,
847
+ ),
848
+ method_name="execute_complete",
849
+ )
845
850
  except AlreadyExists:
846
851
  if not self.use_if_exists:
847
852
  raise
@@ -910,152 +915,13 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
910
915
  cluster_state = event["cluster_state"]
911
916
  cluster_name = event["cluster_name"]
912
917
 
913
- if cluster_state == ClusterStatus.State.ERROR:
918
+ if cluster_state == ClusterStatus.State(ClusterStatus.State.DELETING).name:
914
919
  raise AirflowException(f"Cluster is in ERROR state:\n{cluster_name}")
915
920
 
916
921
  self.log.info("%s completed successfully.", self.task_id)
917
922
  return event["cluster"]
918
923
 
919
924
 
920
- # TODO: Remove one day
921
- @deprecated(
922
- planned_removal_date="March 01, 2025",
923
- use_instead="DataprocUpdateClusterOperator",
924
- category=AirflowProviderDeprecationWarning,
925
- )
926
- class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
927
- """
928
- Scale, up or down, a cluster on Google Cloud Dataproc.
929
-
930
- The operator will wait until the cluster is re-scaled.
931
-
932
- Example usage:
933
-
934
- .. code-block:: python
935
-
936
- t1 = DataprocClusterScaleOperator(
937
- task_id="dataproc_scale",
938
- project_id="my-project",
939
- cluster_name="cluster-1",
940
- num_workers=10,
941
- num_preemptible_workers=10,
942
- graceful_decommission_timeout="1h",
943
- )
944
-
945
- .. seealso::
946
- For more detail on about scaling clusters have a look at the reference:
947
- https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters
948
-
949
- :param cluster_name: The name of the cluster to scale. (templated)
950
- :param project_id: The ID of the google cloud project in which
951
- the cluster runs. (templated)
952
- :param region: The region for the dataproc cluster. (templated)
953
- :param num_workers: The new number of workers
954
- :param num_preemptible_workers: The new number of preemptible workers
955
- :param graceful_decommission_timeout: Timeout for graceful YARN decommissioning.
956
- Maximum value is 1d
957
- :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
958
- :param impersonation_chain: Optional service account to impersonate using short-term
959
- credentials, or chained list of accounts required to get the access_token
960
- of the last account in the list, which will be impersonated in the request.
961
- If set as a string, the account must grant the originating account
962
- the Service Account Token Creator IAM role.
963
- If set as a sequence, the identities from the list must grant
964
- Service Account Token Creator IAM role to the directly preceding identity, with first
965
- account from the list granting this role to the originating account (templated).
966
- """
967
-
968
- template_fields: Sequence[str] = ("cluster_name", "project_id", "region", "impersonation_chain")
969
-
970
- operator_extra_links = (DataprocLink(),)
971
-
972
- def __init__(
973
- self,
974
- *,
975
- cluster_name: str,
976
- project_id: str = PROVIDE_PROJECT_ID,
977
- region: str = "global",
978
- num_workers: int = 2,
979
- num_preemptible_workers: int = 0,
980
- graceful_decommission_timeout: str | None = None,
981
- gcp_conn_id: str = "google_cloud_default",
982
- impersonation_chain: str | Sequence[str] | None = None,
983
- **kwargs,
984
- ) -> None:
985
- super().__init__(**kwargs)
986
- self.project_id = project_id
987
- self.region = region
988
- self.cluster_name = cluster_name
989
- self.num_workers = num_workers
990
- self.num_preemptible_workers = num_preemptible_workers
991
- self.graceful_decommission_timeout = graceful_decommission_timeout
992
- self.gcp_conn_id = gcp_conn_id
993
- self.impersonation_chain = impersonation_chain
994
-
995
- def _build_scale_cluster_data(self) -> dict:
996
- scale_data = {
997
- "config": {
998
- "worker_config": {"num_instances": self.num_workers},
999
- "secondary_worker_config": {"num_instances": self.num_preemptible_workers},
1000
- }
1001
- }
1002
- return scale_data
1003
-
1004
- @property
1005
- def _graceful_decommission_timeout_object(self) -> dict[str, int] | None:
1006
- if not self.graceful_decommission_timeout:
1007
- return None
1008
-
1009
- timeout = None
1010
- match = re.fullmatch(r"(\d+)([smdh])", self.graceful_decommission_timeout)
1011
- if match:
1012
- val = int(match.group(1))
1013
- unit = match.group(2)
1014
- if unit == "s":
1015
- timeout = val
1016
- elif unit == "m":
1017
- timeout = int(timedelta(minutes=val).total_seconds())
1018
- elif unit == "h":
1019
- timeout = int(timedelta(hours=val).total_seconds())
1020
- elif unit == "d":
1021
- timeout = int(timedelta(days=val).total_seconds())
1022
-
1023
- if not timeout:
1024
- raise AirflowException(
1025
- "DataprocClusterScaleOperator "
1026
- " should be expressed in day, hours, minutes or seconds. "
1027
- " i.e. 1d, 4h, 10m, 30s"
1028
- )
1029
-
1030
- return {"seconds": timeout}
1031
-
1032
- def execute(self, context: Context) -> None:
1033
- """Scale, up or down, a cluster on Google Cloud Dataproc."""
1034
- self.log.info("Scaling cluster: %s", self.cluster_name)
1035
-
1036
- scaling_cluster_data = self._build_scale_cluster_data()
1037
- update_mask = ["config.worker_config.num_instances", "config.secondary_worker_config.num_instances"]
1038
-
1039
- hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
1040
- # Save data required to display extra link no matter what the cluster status will be
1041
- DataprocLink.persist(
1042
- context=context,
1043
- task_instance=self,
1044
- url=DATAPROC_CLUSTER_LINK_DEPRECATED,
1045
- resource=self.cluster_name,
1046
- )
1047
- operation = hook.update_cluster(
1048
- project_id=self.project_id,
1049
- region=self.region,
1050
- cluster_name=self.cluster_name,
1051
- cluster=scaling_cluster_data,
1052
- graceful_decommission_timeout=self._graceful_decommission_timeout_object,
1053
- update_mask={"paths": update_mask},
1054
- )
1055
- operation.result()
1056
- self.log.info("Cluster scaling finished")
1057
-
1058
-
1059
925
  class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
1060
926
  """
1061
927
  Delete a cluster in a project.
@@ -1086,7 +952,13 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
1086
952
  :param polling_interval_seconds: Time (seconds) to wait between calls to check the cluster status.
1087
953
  """
1088
954
 
1089
- template_fields: Sequence[str] = ("project_id", "region", "cluster_name", "impersonation_chain")
955
+ template_fields: Sequence[str] = (
956
+ "project_id",
957
+ "region",
958
+ "cluster_name",
959
+ "gcp_conn_id",
960
+ "impersonation_chain",
961
+ )
1090
962
 
1091
963
  def __init__(
1092
964
  self,
@@ -1161,7 +1033,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
1161
1033
  """
1162
1034
  if event and event["status"] == "error":
1163
1035
  raise AirflowException(event["message"])
1164
- elif event is None:
1036
+ if event is None:
1165
1037
  raise AirflowException("No event received in trigger callback")
1166
1038
  self.log.info("Cluster deleted.")
1167
1039
 
@@ -1212,6 +1084,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
1212
1084
  "region",
1213
1085
  "project_id",
1214
1086
  "request_id",
1087
+ "gcp_conn_id",
1215
1088
  "impersonation_chain",
1216
1089
  )
1217
1090
 
@@ -1315,7 +1188,6 @@ class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
1315
1188
  cluster = super().execute(context)
1316
1189
  DataprocClusterLink.persist(
1317
1190
  context=context,
1318
- operator=self,
1319
1191
  cluster_id=self.cluster_name,
1320
1192
  project_id=self._get_project_id(),
1321
1193
  region=self.region,
@@ -1461,8 +1333,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1461
1333
  """Initialize `self.job_template` with default values."""
1462
1334
  if self.project_id is None:
1463
1335
  raise AirflowException(
1464
- "project id should either be set via project_id "
1465
- "parameter or retrieved from the connection,"
1336
+ "project id should either be set via project_id parameter or retrieved from the connection,"
1466
1337
  )
1467
1338
  job_template = DataProcJobBuilder(
1468
1339
  project_id=self.project_id,
@@ -1497,7 +1368,11 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1497
1368
  self.log.info("Job %s submitted successfully.", job_id)
1498
1369
  # Save data required for extra links no matter what the job status will be
1499
1370
  DataprocLink.persist(
1500
- context=context, task_instance=self, url=DATAPROC_JOB_LINK_DEPRECATED, resource=job_id
1371
+ context=context,
1372
+ url=DATAPROC_JOB_LINK_DEPRECATED,
1373
+ resource=job_id,
1374
+ region=self.region,
1375
+ project_id=self.project_id,
1501
1376
  )
1502
1377
 
1503
1378
  if self.deferrable:
@@ -1517,8 +1392,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1517
1392
  self.hook.wait_for_job(job_id=job_id, region=self.region, project_id=self.project_id)
1518
1393
  self.log.info("Job %s completed successfully.", job_id)
1519
1394
  return job_id
1520
- else:
1521
- raise AirflowException("Create a job template before")
1395
+ raise AirflowException("Create a job template before")
1522
1396
 
1523
1397
  def execute_complete(self, context, event=None) -> None:
1524
1398
  """
@@ -1556,7 +1430,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1556
1430
  :param metadata: Additional metadata that is provided to the method.
1557
1431
  """
1558
1432
 
1559
- template_fields: Sequence[str] = ("region", "template")
1433
+ template_fields: Sequence[str] = ("region", "template", "gcp_conn_id")
1560
1434
  template_fields_renderers = {"template": "json"}
1561
1435
  operator_extra_links = (DataprocWorkflowTemplateLink(),)
1562
1436
 
@@ -1602,7 +1476,6 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1602
1476
  if project_id:
1603
1477
  DataprocWorkflowTemplateLink.persist(
1604
1478
  context=context,
1605
- operator=self,
1606
1479
  workflow_template_id=self.template["id"],
1607
1480
  region=self.region,
1608
1481
  project_id=project_id,
@@ -1651,7 +1524,13 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1651
1524
  :param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
1652
1525
  """
1653
1526
 
1654
- template_fields: Sequence[str] = ("template_id", "impersonation_chain", "request_id", "parameters")
1527
+ template_fields: Sequence[str] = (
1528
+ "template_id",
1529
+ "gcp_conn_id",
1530
+ "impersonation_chain",
1531
+ "request_id",
1532
+ "parameters",
1533
+ )
1655
1534
  template_fields_renderers = {"parameters": "json"}
1656
1535
  operator_extra_links = (DataprocWorkflowLink(),)
1657
1536
 
@@ -1714,7 +1593,6 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1714
1593
  if project_id:
1715
1594
  DataprocWorkflowLink.persist(
1716
1595
  context=context,
1717
- operator=self,
1718
1596
  workflow_id=workflow_id,
1719
1597
  region=self.region,
1720
1598
  project_id=project_id,
@@ -1800,7 +1678,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1800
1678
  :param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
1801
1679
  """
1802
1680
 
1803
- template_fields: Sequence[str] = ("template", "impersonation_chain")
1681
+ template_fields: Sequence[str] = ("template", "gcp_conn_id", "impersonation_chain")
1804
1682
  template_fields_renderers = {"template": "json"}
1805
1683
  operator_extra_links = (DataprocWorkflowLink(),)
1806
1684
 
@@ -1870,7 +1748,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1870
1748
  if project_id:
1871
1749
  DataprocWorkflowLink.persist(
1872
1750
  context=context,
1873
- operator=self,
1874
1751
  workflow_id=workflow_id,
1875
1752
  region=self.region,
1876
1753
  project_id=project_id,
@@ -1969,7 +1846,14 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1969
1846
  :param wait_timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False
1970
1847
  """
1971
1848
 
1972
- template_fields: Sequence[str] = ("project_id", "region", "job", "impersonation_chain", "request_id")
1849
+ template_fields: Sequence[str] = (
1850
+ "project_id",
1851
+ "region",
1852
+ "job",
1853
+ "gcp_conn_id",
1854
+ "impersonation_chain",
1855
+ "request_id",
1856
+ )
1973
1857
  template_fields_renderers = {"job": "json"}
1974
1858
 
1975
1859
  operator_extra_links = (DataprocJobLink(),)
@@ -2044,7 +1928,6 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2044
1928
  if project_id:
2045
1929
  DataprocJobLink.persist(
2046
1930
  context=context,
2047
- operator=self,
2048
1931
  job_id=new_job_id,
2049
1932
  region=self.region,
2050
1933
  project_id=project_id,
@@ -2056,9 +1939,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2056
1939
  state = job.status.state
2057
1940
  if state == JobStatus.State.DONE:
2058
1941
  return self.job_id
2059
- elif state == JobStatus.State.ERROR:
1942
+ if state == JobStatus.State.ERROR:
2060
1943
  raise AirflowException(f"Job failed:\n{job}")
2061
- elif state == JobStatus.State.CANCELLED:
1944
+ if state == JobStatus.State.CANCELLED:
2062
1945
  raise AirflowException(f"Job was cancelled:\n{job}")
2063
1946
  self.defer(
2064
1947
  trigger=DataprocSubmitTrigger(
@@ -2090,9 +1973,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
2090
1973
  job_state = event["job_state"]
2091
1974
  job_id = event["job_id"]
2092
1975
  job = event["job"]
2093
- if job_state == JobStatus.State.ERROR:
1976
+ if job_state == JobStatus.State.ERROR.name: # type: ignore
2094
1977
  raise AirflowException(f"Job {job_id} failed:\n{job}")
2095
- if job_state == JobStatus.State.CANCELLED:
1978
+ if job_state == JobStatus.State.CANCELLED.name: # type: ignore
2096
1979
  raise AirflowException(f"Job {job_id} was cancelled:\n{job}")
2097
1980
  self.log.info("%s completed successfully.", self.task_id)
2098
1981
  return job_id
@@ -2169,6 +2052,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2169
2052
  "region",
2170
2053
  "request_id",
2171
2054
  "project_id",
2055
+ "gcp_conn_id",
2172
2056
  "impersonation_chain",
2173
2057
  )
2174
2058
  operator_extra_links = (DataprocClusterLink(),)
@@ -2217,7 +2101,6 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2217
2101
  if project_id:
2218
2102
  DataprocClusterLink.persist(
2219
2103
  context=context,
2220
- operator=self,
2221
2104
  cluster_id=self.cluster_name,
2222
2105
  project_id=project_id,
2223
2106
  region=self.region,
@@ -2305,6 +2188,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
2305
2188
  "project_id",
2306
2189
  "region",
2307
2190
  "cluster_name",
2191
+ "gcp_conn_id",
2308
2192
  "impersonation_chain",
2309
2193
  "tarball_gcs_dir",
2310
2194
  "diagnosis_interval",
@@ -2451,6 +2335,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2451
2335
  "batch",
2452
2336
  "batch_id",
2453
2337
  "region",
2338
+ "gcp_conn_id",
2454
2339
  "impersonation_chain",
2455
2340
  )
2456
2341
  operator_extra_links = (DataprocBatchLink(),)
@@ -2516,7 +2401,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2516
2401
  # Persist the link earlier so users can observe the progress
2517
2402
  DataprocBatchLink.persist(
2518
2403
  context=context,
2519
- operator=self,
2520
2404
  project_id=self.project_id,
2521
2405
  region=self.region,
2522
2406
  batch_id=self.batch_id,
@@ -2528,6 +2412,8 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2528
2412
  self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
2529
2413
  self._inject_openlineage_properties_into_dataproc_batch(context)
2530
2414
 
2415
+ self.__update_batch_labels()
2416
+
2531
2417
  try:
2532
2418
  self.operation = self.hook.create_batch(
2533
2419
  region=self.region,
@@ -2551,7 +2437,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2551
2437
 
2552
2438
  DataprocBatchLink.persist(
2553
2439
  context=context,
2554
- operator=self,
2555
2440
  project_id=self.project_id,
2556
2441
  region=self.region,
2557
2442
  batch_id=batch_id,
@@ -2601,7 +2486,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2601
2486
  if not self.hook.check_error_for_resource_is_not_ready_msg(batch.state_message):
2602
2487
  break
2603
2488
 
2604
- self.handle_batch_status(context, batch.state, batch_id, batch.state_message)
2489
+ self.handle_batch_status(context, batch.state.name, batch_id, batch.state_message)
2605
2490
  return Batch.to_dict(batch)
2606
2491
 
2607
2492
  @cached_property
@@ -2626,21 +2511,21 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2626
2511
  self.operation.cancel()
2627
2512
 
2628
2513
  def handle_batch_status(
2629
- self, context: Context, state: Batch.State, batch_id: str, state_message: str | None = None
2514
+ self, context: Context, state: str, batch_id: str, state_message: str | None = None
2630
2515
  ) -> None:
2631
2516
  # The existing batch may be a number of states other than 'SUCCEEDED'\
2632
2517
  # wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
2633
2518
  # finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
2634
2519
  link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, batch_id=batch_id)
2635
- if state == Batch.State.FAILED:
2520
+ if state == Batch.State.FAILED.name: # type: ignore
2636
2521
  raise AirflowException(
2637
- f"Batch job {batch_id} failed with error: {state_message}\nDriver Logs: {link}"
2522
+ f"Batch job {batch_id} failed with error: {state_message}.\nDriver logs: {link}"
2638
2523
  )
2639
- if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
2640
- raise AirflowException(f"Batch job {batch_id} was cancelled. Driver logs: {link}")
2641
- if state == Batch.State.STATE_UNSPECIFIED:
2642
- raise AirflowException(f"Batch job {batch_id} unspecified. Driver logs: {link}")
2643
- self.log.info("Batch job %s completed. Driver logs: %s", batch_id, link)
2524
+ if state in (Batch.State.CANCELLED.name, Batch.State.CANCELLING.name): # type: ignore
2525
+ raise AirflowException(f"Batch job {batch_id} was cancelled.\nDriver logs: {link}")
2526
+ if state == Batch.State.STATE_UNSPECIFIED.name: # type: ignore
2527
+ raise AirflowException(f"Batch job {batch_id} unspecified.\nDriver logs: {link}")
2528
+ self.log.info("Batch job %s completed.\nDriver logs: %s", batch_id, link)
2644
2529
 
2645
2530
  def retry_batch_creation(
2646
2531
  self,
@@ -2708,6 +2593,31 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2708
2593
  exc_info=e,
2709
2594
  )
2710
2595
 
2596
+ def __update_batch_labels(self):
2597
+ dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
2598
+ task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
2599
+
2600
+ labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
2601
+ if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
2602
+ return
2603
+
2604
+ labels_limit = 32
2605
+ new_labels = {"airflow-dag-id": dag_id, "airflow-task-id": task_id}
2606
+
2607
+ if self._dag:
2608
+ dag_display_name = re.sub(r"[.\s]", "_", self._dag.dag_display_name.lower())
2609
+ if labels_regex.match(dag_id):
2610
+ new_labels["airflow-dag-display-name"] = dag_display_name
2611
+
2612
+ if isinstance(self.batch, Batch):
2613
+ if len(self.batch.labels) + len(new_labels) <= labels_limit:
2614
+ self.batch.labels.update(new_labels)
2615
+ elif "labels" not in self.batch:
2616
+ self.batch["labels"] = new_labels
2617
+ elif isinstance(self.batch.get("labels"), dict):
2618
+ if len(self.batch["labels"]) + len(new_labels) <= labels_limit:
2619
+ self.batch["labels"].update(new_labels)
2620
+
2711
2621
 
2712
2622
  class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
2713
2623
  """
@@ -2734,7 +2644,13 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
2734
2644
  account from the list granting this role to the originating account (templated).
2735
2645
  """
2736
2646
 
2737
- template_fields: Sequence[str] = ("batch_id", "region", "project_id", "impersonation_chain")
2647
+ template_fields: Sequence[str] = (
2648
+ "batch_id",
2649
+ "region",
2650
+ "project_id",
2651
+ "gcp_conn_id",
2652
+ "impersonation_chain",
2653
+ )
2738
2654
 
2739
2655
  def __init__(
2740
2656
  self,
@@ -2798,7 +2714,13 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
2798
2714
  account from the list granting this role to the originating account (templated).
2799
2715
  """
2800
2716
 
2801
- template_fields: Sequence[str] = ("batch_id", "region", "project_id", "impersonation_chain")
2717
+ template_fields: Sequence[str] = (
2718
+ "batch_id",
2719
+ "region",
2720
+ "project_id",
2721
+ "gcp_conn_id",
2722
+ "impersonation_chain",
2723
+ )
2802
2724
  operator_extra_links = (DataprocBatchLink(),)
2803
2725
 
2804
2726
  def __init__(
@@ -2839,7 +2761,6 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
2839
2761
  if project_id:
2840
2762
  DataprocBatchLink.persist(
2841
2763
  context=context,
2842
- operator=self,
2843
2764
  project_id=project_id,
2844
2765
  region=self.region,
2845
2766
  batch_id=self.batch_id,
@@ -2875,7 +2796,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2875
2796
  :param order_by: How to order results as specified in ListBatchesRequest
2876
2797
  """
2877
2798
 
2878
- template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
2799
+ template_fields: Sequence[str] = ("region", "project_id", "gcp_conn_id", "impersonation_chain")
2879
2800
  operator_extra_links = (DataprocBatchesListLink(),)
2880
2801
 
2881
2802
  def __init__(
@@ -2922,7 +2843,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2922
2843
  )
2923
2844
  project_id = self.project_id or hook.project_id
2924
2845
  if project_id:
2925
- DataprocBatchesListLink.persist(context=context, operator=self, project_id=project_id)
2846
+ DataprocBatchesListLink.persist(context=context, project_id=project_id)
2926
2847
  return [Batch.to_dict(result) for result in results]
2927
2848
 
2928
2849
 
@@ -2949,7 +2870,13 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
2949
2870
  account from the list granting this role to the originating account (templated).
2950
2871
  """
2951
2872
 
2952
- template_fields: Sequence[str] = ("operation_name", "region", "project_id", "impersonation_chain")
2873
+ template_fields: Sequence[str] = (
2874
+ "operation_name",
2875
+ "region",
2876
+ "project_id",
2877
+ "gcp_conn_id",
2878
+ "impersonation_chain",
2879
+ )
2953
2880
 
2954
2881
  def __init__(
2955
2882
  self,