apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
  2. airflow/providers/google/__init__.py +3 -3
  3. airflow/providers/google/ads/hooks/ads.py +39 -5
  4. airflow/providers/google/ads/operators/ads.py +2 -2
  5. airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
  6. airflow/providers/google/assets/gcs.py +1 -11
  7. airflow/providers/google/cloud/bundles/__init__.py +16 -0
  8. airflow/providers/google/cloud/bundles/gcs.py +161 -0
  9. airflow/providers/google/cloud/hooks/bigquery.py +166 -281
  10. airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
  11. airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
  12. airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
  13. airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
  14. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
  15. airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
  16. airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
  17. airflow/providers/google/cloud/hooks/dataflow.py +71 -94
  18. airflow/providers/google/cloud/hooks/datafusion.py +1 -1
  19. airflow/providers/google/cloud/hooks/dataplex.py +1 -1
  20. airflow/providers/google/cloud/hooks/dataprep.py +1 -1
  21. airflow/providers/google/cloud/hooks/dataproc.py +72 -71
  22. airflow/providers/google/cloud/hooks/gcs.py +111 -14
  23. airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
  24. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  25. airflow/providers/google/cloud/hooks/looker.py +6 -1
  26. airflow/providers/google/cloud/hooks/mlengine.py +3 -2
  27. airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
  28. airflow/providers/google/cloud/hooks/spanner.py +73 -8
  29. airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
  30. airflow/providers/google/cloud/hooks/translate.py +1 -1
  31. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
  32. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
  33. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
  34. airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
  35. airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
  36. airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
  37. airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
  38. airflow/providers/google/cloud/hooks/vision.py +2 -2
  39. airflow/providers/google/cloud/hooks/workflows.py +1 -1
  40. airflow/providers/google/cloud/links/alloy_db.py +0 -46
  41. airflow/providers/google/cloud/links/base.py +77 -13
  42. airflow/providers/google/cloud/links/bigquery.py +0 -47
  43. airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
  44. airflow/providers/google/cloud/links/bigtable.py +0 -48
  45. airflow/providers/google/cloud/links/cloud_build.py +0 -73
  46. airflow/providers/google/cloud/links/cloud_functions.py +0 -33
  47. airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
  48. airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
  49. airflow/providers/google/cloud/links/cloud_sql.py +0 -33
  50. airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
  51. airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
  52. airflow/providers/google/cloud/links/compute.py +0 -58
  53. airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
  54. airflow/providers/google/cloud/links/datacatalog.py +23 -54
  55. airflow/providers/google/cloud/links/dataflow.py +0 -34
  56. airflow/providers/google/cloud/links/dataform.py +0 -64
  57. airflow/providers/google/cloud/links/datafusion.py +1 -96
  58. airflow/providers/google/cloud/links/dataplex.py +0 -154
  59. airflow/providers/google/cloud/links/dataprep.py +0 -24
  60. airflow/providers/google/cloud/links/dataproc.py +11 -95
  61. airflow/providers/google/cloud/links/datastore.py +0 -31
  62. airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
  63. airflow/providers/google/cloud/links/managed_kafka.py +0 -70
  64. airflow/providers/google/cloud/links/mlengine.py +0 -70
  65. airflow/providers/google/cloud/links/pubsub.py +0 -32
  66. airflow/providers/google/cloud/links/spanner.py +0 -33
  67. airflow/providers/google/cloud/links/stackdriver.py +0 -30
  68. airflow/providers/google/cloud/links/translate.py +17 -187
  69. airflow/providers/google/cloud/links/vertex_ai.py +28 -195
  70. airflow/providers/google/cloud/links/workflows.py +0 -52
  71. airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
  72. airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
  73. airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
  74. airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
  75. airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
  76. airflow/providers/google/cloud/openlineage/facets.py +102 -1
  77. airflow/providers/google/cloud/openlineage/mixins.py +10 -8
  78. airflow/providers/google/cloud/openlineage/utils.py +15 -1
  79. airflow/providers/google/cloud/operators/alloy_db.py +70 -55
  80. airflow/providers/google/cloud/operators/bigquery.py +73 -636
  81. airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
  82. airflow/providers/google/cloud/operators/bigtable.py +36 -7
  83. airflow/providers/google/cloud/operators/cloud_base.py +21 -1
  84. airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
  85. airflow/providers/google/cloud/operators/cloud_build.py +75 -32
  86. airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
  87. airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
  88. airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
  89. airflow/providers/google/cloud/operators/cloud_run.py +23 -5
  90. airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
  91. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
  92. airflow/providers/google/cloud/operators/compute.py +8 -40
  93. airflow/providers/google/cloud/operators/datacatalog.py +157 -21
  94. airflow/providers/google/cloud/operators/dataflow.py +38 -15
  95. airflow/providers/google/cloud/operators/dataform.py +15 -5
  96. airflow/providers/google/cloud/operators/datafusion.py +41 -20
  97. airflow/providers/google/cloud/operators/dataplex.py +193 -109
  98. airflow/providers/google/cloud/operators/dataprep.py +1 -5
  99. airflow/providers/google/cloud/operators/dataproc.py +78 -35
  100. airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
  101. airflow/providers/google/cloud/operators/datastore.py +22 -6
  102. airflow/providers/google/cloud/operators/dlp.py +6 -29
  103. airflow/providers/google/cloud/operators/functions.py +16 -7
  104. airflow/providers/google/cloud/operators/gcs.py +10 -8
  105. airflow/providers/google/cloud/operators/gen_ai.py +389 -0
  106. airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
  107. airflow/providers/google/cloud/operators/looker.py +1 -1
  108. airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
  109. airflow/providers/google/cloud/operators/natural_language.py +1 -1
  110. airflow/providers/google/cloud/operators/pubsub.py +60 -14
  111. airflow/providers/google/cloud/operators/spanner.py +25 -12
  112. airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
  113. airflow/providers/google/cloud/operators/stackdriver.py +1 -9
  114. airflow/providers/google/cloud/operators/tasks.py +1 -12
  115. airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
  116. airflow/providers/google/cloud/operators/translate.py +40 -16
  117. airflow/providers/google/cloud/operators/translate_speech.py +1 -2
  118. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
  119. airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
  120. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
  121. airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
  122. airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
  123. airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
  124. airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
  125. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
  126. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
  127. airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
  128. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
  129. airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
  130. airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
  131. airflow/providers/google/cloud/operators/vision.py +2 -2
  132. airflow/providers/google/cloud/operators/workflows.py +18 -15
  133. airflow/providers/google/cloud/sensors/bigquery.py +2 -2
  134. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
  135. airflow/providers/google/cloud/sensors/bigtable.py +11 -4
  136. airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
  137. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
  138. airflow/providers/google/cloud/sensors/dataflow.py +26 -9
  139. airflow/providers/google/cloud/sensors/dataform.py +2 -2
  140. airflow/providers/google/cloud/sensors/datafusion.py +4 -4
  141. airflow/providers/google/cloud/sensors/dataplex.py +2 -2
  142. airflow/providers/google/cloud/sensors/dataprep.py +2 -2
  143. airflow/providers/google/cloud/sensors/dataproc.py +2 -2
  144. airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
  145. airflow/providers/google/cloud/sensors/gcs.py +4 -4
  146. airflow/providers/google/cloud/sensors/looker.py +2 -2
  147. airflow/providers/google/cloud/sensors/pubsub.py +4 -4
  148. airflow/providers/google/cloud/sensors/tasks.py +2 -2
  149. airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
  150. airflow/providers/google/cloud/sensors/workflows.py +2 -2
  151. airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
  152. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
  153. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
  154. airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
  155. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
  156. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
  157. airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
  158. airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
  159. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
  160. airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
  161. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
  162. airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
  163. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
  164. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
  165. airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
  166. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
  167. airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
  168. airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
  169. airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
  170. airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
  171. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  172. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
  173. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
  174. airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
  175. airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
  176. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
  177. airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
  178. airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
  179. airflow/providers/google/cloud/triggers/bigquery.py +75 -34
  180. airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
  181. airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
  182. airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
  183. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
  184. airflow/providers/google/cloud/triggers/dataflow.py +122 -0
  185. airflow/providers/google/cloud/triggers/datafusion.py +1 -1
  186. airflow/providers/google/cloud/triggers/dataplex.py +14 -2
  187. airflow/providers/google/cloud/triggers/dataproc.py +122 -52
  188. airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
  189. airflow/providers/google/cloud/triggers/mlengine.py +1 -1
  190. airflow/providers/google/cloud/triggers/pubsub.py +15 -19
  191. airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
  192. airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
  193. airflow/providers/google/cloud/utils/field_validator.py +1 -2
  194. airflow/providers/google/common/auth_backend/google_openid.py +4 -4
  195. airflow/providers/google/common/deprecated.py +2 -1
  196. airflow/providers/google/common/hooks/base_google.py +27 -8
  197. airflow/providers/google/common/links/storage.py +0 -22
  198. airflow/providers/google/common/utils/get_secret.py +31 -0
  199. airflow/providers/google/common/utils/id_token_credentials.py +3 -4
  200. airflow/providers/google/firebase/operators/firestore.py +2 -2
  201. airflow/providers/google/get_provider_info.py +56 -52
  202. airflow/providers/google/go_module_utils.py +35 -3
  203. airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
  204. airflow/providers/google/leveldb/operators/leveldb.py +2 -2
  205. airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
  206. airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
  207. airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
  208. airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
  209. airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
  210. airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
  211. airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
  212. airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
  213. airflow/providers/google/suite/hooks/calendar.py +1 -1
  214. airflow/providers/google/suite/hooks/sheets.py +15 -1
  215. airflow/providers/google/suite/operators/sheets.py +8 -3
  216. airflow/providers/google/suite/sensors/drive.py +2 -2
  217. airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
  218. airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
  219. airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
  220. airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
  221. airflow/providers/google/version_compat.py +15 -1
  222. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
  223. apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
  224. apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
  225. airflow/providers/google/cloud/hooks/automl.py +0 -673
  226. airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
  227. airflow/providers/google/cloud/links/automl.py +0 -193
  228. airflow/providers/google/cloud/operators/automl.py +0 -1362
  229. airflow/providers/google/cloud/operators/life_sciences.py +0 -119
  230. airflow/providers/google/cloud/operators/mlengine.py +0 -112
  231. apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
  232. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
  233. {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
  234. {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
@@ -72,7 +72,7 @@ if TYPE_CHECKING:
72
72
  from google.protobuf.field_mask_pb2 import FieldMask
73
73
  from google.type.interval_pb2 import Interval
74
74
 
75
- from airflow.utils.context import Context
75
+ from airflow.providers.common.compat.sdk import Context
76
76
 
77
77
 
78
78
  class PreemptibilityType(Enum):
@@ -213,6 +213,7 @@ class ClusterGenerator:
213
213
  :param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
214
214
  see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
215
215
  :param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
216
+ :param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD" / "CLUSTER_TIER_PREMIUM").
216
217
  """
217
218
 
218
219
  def __init__(
@@ -261,6 +262,8 @@ class ClusterGenerator:
261
262
  secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
262
263
  secondary_worker_accelerator_type: str | None = None,
263
264
  secondary_worker_accelerator_count: int | None = None,
265
+ *,
266
+ cluster_tier: str | None = None,
264
267
  **kwargs,
265
268
  ) -> None:
266
269
  self.project_id = project_id
@@ -308,6 +311,7 @@ class ClusterGenerator:
308
311
  self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
309
312
  self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
310
313
  self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
314
+ self.cluster_tier = cluster_tier
311
315
 
312
316
  if self.custom_image and self.image_version:
313
317
  raise ValueError("The custom_image and image_version can't be both set")
@@ -513,6 +517,9 @@ class ClusterGenerator:
513
517
  if self.driver_pool_size > 0:
514
518
  cluster_data["auxiliary_node_groups"] = [self._build_driver_pool()]
515
519
 
520
+ if self.cluster_tier:
521
+ cluster_data["cluster_tier"] = self.cluster_tier
522
+
516
523
  cluster_data = self._build_gce_cluster_config(cluster_data)
517
524
 
518
525
  if self.single_node:
@@ -621,6 +628,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
621
628
  "virtual_cluster_config",
622
629
  "cluster_name",
623
630
  "labels",
631
+ "gcp_conn_id",
624
632
  "impersonation_chain",
625
633
  )
626
634
  template_fields_renderers = {"cluster_config": "json", "virtual_cluster_config": "json"}
@@ -807,7 +815,6 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
807
815
  if project_id:
808
816
  DataprocClusterLink.persist(
809
817
  context=context,
810
- operator=self,
811
818
  cluster_id=self.cluster_name,
812
819
  project_id=project_id,
813
820
  region=self.region,
@@ -908,7 +915,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
908
915
  cluster_state = event["cluster_state"]
909
916
  cluster_name = event["cluster_name"]
910
917
 
911
- if cluster_state == ClusterStatus.State.ERROR:
918
+ if cluster_state == ClusterStatus.State(ClusterStatus.State.DELETING).name:
912
919
  raise AirflowException(f"Cluster is in ERROR state:\n{cluster_name}")
913
920
 
914
921
  self.log.info("%s completed successfully.", self.task_id)
@@ -945,7 +952,13 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
945
952
  :param polling_interval_seconds: Time (seconds) to wait between calls to check the cluster status.
946
953
  """
947
954
 
948
- template_fields: Sequence[str] = ("project_id", "region", "cluster_name", "impersonation_chain")
955
+ template_fields: Sequence[str] = (
956
+ "project_id",
957
+ "region",
958
+ "cluster_name",
959
+ "gcp_conn_id",
960
+ "impersonation_chain",
961
+ )
949
962
 
950
963
  def __init__(
951
964
  self,
@@ -1071,6 +1084,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
1071
1084
  "region",
1072
1085
  "project_id",
1073
1086
  "request_id",
1087
+ "gcp_conn_id",
1074
1088
  "impersonation_chain",
1075
1089
  )
1076
1090
 
@@ -1174,7 +1188,6 @@ class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
1174
1188
  cluster = super().execute(context)
1175
1189
  DataprocClusterLink.persist(
1176
1190
  context=context,
1177
- operator=self,
1178
1191
  cluster_id=self.cluster_name,
1179
1192
  project_id=self._get_project_id(),
1180
1193
  region=self.region,
@@ -1355,7 +1368,11 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1355
1368
  self.log.info("Job %s submitted successfully.", job_id)
1356
1369
  # Save data required for extra links no matter what the job status will be
1357
1370
  DataprocLink.persist(
1358
- context=context, task_instance=self, url=DATAPROC_JOB_LINK_DEPRECATED, resource=job_id
1371
+ context=context,
1372
+ url=DATAPROC_JOB_LINK_DEPRECATED,
1373
+ resource=job_id,
1374
+ region=self.region,
1375
+ project_id=self.project_id,
1359
1376
  )
1360
1377
 
1361
1378
  if self.deferrable:
@@ -1413,7 +1430,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1413
1430
  :param metadata: Additional metadata that is provided to the method.
1414
1431
  """
1415
1432
 
1416
- template_fields: Sequence[str] = ("region", "template")
1433
+ template_fields: Sequence[str] = ("region", "template", "gcp_conn_id")
1417
1434
  template_fields_renderers = {"template": "json"}
1418
1435
  operator_extra_links = (DataprocWorkflowTemplateLink(),)
1419
1436
 
@@ -1459,7 +1476,6 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1459
1476
  if project_id:
1460
1477
  DataprocWorkflowTemplateLink.persist(
1461
1478
  context=context,
1462
- operator=self,
1463
1479
  workflow_template_id=self.template["id"],
1464
1480
  region=self.region,
1465
1481
  project_id=project_id,
@@ -1508,7 +1524,13 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1508
1524
  :param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
1509
1525
  """
1510
1526
 
1511
- template_fields: Sequence[str] = ("template_id", "impersonation_chain", "request_id", "parameters")
1527
+ template_fields: Sequence[str] = (
1528
+ "template_id",
1529
+ "gcp_conn_id",
1530
+ "impersonation_chain",
1531
+ "request_id",
1532
+ "parameters",
1533
+ )
1512
1534
  template_fields_renderers = {"parameters": "json"}
1513
1535
  operator_extra_links = (DataprocWorkflowLink(),)
1514
1536
 
@@ -1571,7 +1593,6 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
1571
1593
  if project_id:
1572
1594
  DataprocWorkflowLink.persist(
1573
1595
  context=context,
1574
- operator=self,
1575
1596
  workflow_id=workflow_id,
1576
1597
  region=self.region,
1577
1598
  project_id=project_id,
@@ -1657,7 +1678,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1657
1678
  :param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
1658
1679
  """
1659
1680
 
1660
- template_fields: Sequence[str] = ("template", "impersonation_chain")
1681
+ template_fields: Sequence[str] = ("template", "gcp_conn_id", "impersonation_chain")
1661
1682
  template_fields_renderers = {"template": "json"}
1662
1683
  operator_extra_links = (DataprocWorkflowLink(),)
1663
1684
 
@@ -1727,7 +1748,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
1727
1748
  if project_id:
1728
1749
  DataprocWorkflowLink.persist(
1729
1750
  context=context,
1730
- operator=self,
1731
1751
  workflow_id=workflow_id,
1732
1752
  region=self.region,
1733
1753
  project_id=project_id,
@@ -1826,7 +1846,14 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1826
1846
  :param wait_timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False
1827
1847
  """
1828
1848
 
1829
- template_fields: Sequence[str] = ("project_id", "region", "job", "impersonation_chain", "request_id")
1849
+ template_fields: Sequence[str] = (
1850
+ "project_id",
1851
+ "region",
1852
+ "job",
1853
+ "gcp_conn_id",
1854
+ "impersonation_chain",
1855
+ "request_id",
1856
+ )
1830
1857
  template_fields_renderers = {"job": "json"}
1831
1858
 
1832
1859
  operator_extra_links = (DataprocJobLink(),)
@@ -1901,7 +1928,6 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1901
1928
  if project_id:
1902
1929
  DataprocJobLink.persist(
1903
1930
  context=context,
1904
- operator=self,
1905
1931
  job_id=new_job_id,
1906
1932
  region=self.region,
1907
1933
  project_id=project_id,
@@ -1947,9 +1973,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
1947
1973
  job_state = event["job_state"]
1948
1974
  job_id = event["job_id"]
1949
1975
  job = event["job"]
1950
- if job_state == JobStatus.State.ERROR:
1976
+ if job_state == JobStatus.State.ERROR.name: # type: ignore
1951
1977
  raise AirflowException(f"Job {job_id} failed:\n{job}")
1952
- if job_state == JobStatus.State.CANCELLED:
1978
+ if job_state == JobStatus.State.CANCELLED.name: # type: ignore
1953
1979
  raise AirflowException(f"Job {job_id} was cancelled:\n{job}")
1954
1980
  self.log.info("%s completed successfully.", self.task_id)
1955
1981
  return job_id
@@ -2026,6 +2052,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2026
2052
  "region",
2027
2053
  "request_id",
2028
2054
  "project_id",
2055
+ "gcp_conn_id",
2029
2056
  "impersonation_chain",
2030
2057
  )
2031
2058
  operator_extra_links = (DataprocClusterLink(),)
@@ -2074,7 +2101,6 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
2074
2101
  if project_id:
2075
2102
  DataprocClusterLink.persist(
2076
2103
  context=context,
2077
- operator=self,
2078
2104
  cluster_id=self.cluster_name,
2079
2105
  project_id=project_id,
2080
2106
  region=self.region,
@@ -2162,6 +2188,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
2162
2188
  "project_id",
2163
2189
  "region",
2164
2190
  "cluster_name",
2191
+ "gcp_conn_id",
2165
2192
  "impersonation_chain",
2166
2193
  "tarball_gcs_dir",
2167
2194
  "diagnosis_interval",
@@ -2308,6 +2335,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2308
2335
  "batch",
2309
2336
  "batch_id",
2310
2337
  "region",
2338
+ "gcp_conn_id",
2311
2339
  "impersonation_chain",
2312
2340
  )
2313
2341
  operator_extra_links = (DataprocBatchLink(),)
@@ -2373,7 +2401,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2373
2401
  # Persist the link earlier so users can observe the progress
2374
2402
  DataprocBatchLink.persist(
2375
2403
  context=context,
2376
- operator=self,
2377
2404
  project_id=self.project_id,
2378
2405
  region=self.region,
2379
2406
  batch_id=self.batch_id,
@@ -2410,7 +2437,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2410
2437
 
2411
2438
  DataprocBatchLink.persist(
2412
2439
  context=context,
2413
- operator=self,
2414
2440
  project_id=self.project_id,
2415
2441
  region=self.region,
2416
2442
  batch_id=batch_id,
@@ -2460,7 +2486,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2460
2486
  if not self.hook.check_error_for_resource_is_not_ready_msg(batch.state_message):
2461
2487
  break
2462
2488
 
2463
- self.handle_batch_status(context, batch.state, batch_id, batch.state_message)
2489
+ self.handle_batch_status(context, batch.state.name, batch_id, batch.state_message)
2464
2490
  return Batch.to_dict(batch)
2465
2491
 
2466
2492
  @cached_property
@@ -2485,21 +2511,21 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2485
2511
  self.operation.cancel()
2486
2512
 
2487
2513
  def handle_batch_status(
2488
- self, context: Context, state: Batch.State, batch_id: str, state_message: str | None = None
2514
+ self, context: Context, state: str, batch_id: str, state_message: str | None = None
2489
2515
  ) -> None:
2490
2516
  # The existing batch may be a number of states other than 'SUCCEEDED'\
2491
2517
  # wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
2492
2518
  # finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
2493
2519
  link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, batch_id=batch_id)
2494
- if state == Batch.State.FAILED:
2520
+ if state == Batch.State.FAILED.name: # type: ignore
2495
2521
  raise AirflowException(
2496
- f"Batch job {batch_id} failed with error: {state_message}\nDriver Logs: {link}"
2522
+ f"Batch job {batch_id} failed with error: {state_message}.\nDriver logs: {link}"
2497
2523
  )
2498
- if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
2499
- raise AirflowException(f"Batch job {batch_id} was cancelled. Driver logs: {link}")
2500
- if state == Batch.State.STATE_UNSPECIFIED:
2501
- raise AirflowException(f"Batch job {batch_id} unspecified. Driver logs: {link}")
2502
- self.log.info("Batch job %s completed. Driver logs: %s", batch_id, link)
2524
+ if state in (Batch.State.CANCELLED.name, Batch.State.CANCELLING.name): # type: ignore
2525
+ raise AirflowException(f"Batch job {batch_id} was cancelled.\nDriver logs: {link}")
2526
+ if state == Batch.State.STATE_UNSPECIFIED.name: # type: ignore
2527
+ raise AirflowException(f"Batch job {batch_id} unspecified.\nDriver logs: {link}")
2528
+ self.log.info("Batch job %s completed.\nDriver logs: %s", batch_id, link)
2503
2529
 
2504
2530
  def retry_batch_creation(
2505
2531
  self,
@@ -2571,7 +2597,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
2571
2597
  dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
2572
2598
  task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
2573
2599
 
2574
- labels_regex = re.compile(r"^[a-z][\w-]{0,63}$")
2600
+ labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
2575
2601
  if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
2576
2602
  return
2577
2603
 
@@ -2618,7 +2644,13 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
2618
2644
  account from the list granting this role to the originating account (templated).
2619
2645
  """
2620
2646
 
2621
- template_fields: Sequence[str] = ("batch_id", "region", "project_id", "impersonation_chain")
2647
+ template_fields: Sequence[str] = (
2648
+ "batch_id",
2649
+ "region",
2650
+ "project_id",
2651
+ "gcp_conn_id",
2652
+ "impersonation_chain",
2653
+ )
2622
2654
 
2623
2655
  def __init__(
2624
2656
  self,
@@ -2682,7 +2714,13 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
2682
2714
  account from the list granting this role to the originating account (templated).
2683
2715
  """
2684
2716
 
2685
- template_fields: Sequence[str] = ("batch_id", "region", "project_id", "impersonation_chain")
2717
+ template_fields: Sequence[str] = (
2718
+ "batch_id",
2719
+ "region",
2720
+ "project_id",
2721
+ "gcp_conn_id",
2722
+ "impersonation_chain",
2723
+ )
2686
2724
  operator_extra_links = (DataprocBatchLink(),)
2687
2725
 
2688
2726
  def __init__(
@@ -2723,7 +2761,6 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
2723
2761
  if project_id:
2724
2762
  DataprocBatchLink.persist(
2725
2763
  context=context,
2726
- operator=self,
2727
2764
  project_id=project_id,
2728
2765
  region=self.region,
2729
2766
  batch_id=self.batch_id,
@@ -2759,7 +2796,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2759
2796
  :param order_by: How to order results as specified in ListBatchesRequest
2760
2797
  """
2761
2798
 
2762
- template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
2799
+ template_fields: Sequence[str] = ("region", "project_id", "gcp_conn_id", "impersonation_chain")
2763
2800
  operator_extra_links = (DataprocBatchesListLink(),)
2764
2801
 
2765
2802
  def __init__(
@@ -2806,7 +2843,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
2806
2843
  )
2807
2844
  project_id = self.project_id or hook.project_id
2808
2845
  if project_id:
2809
- DataprocBatchesListLink.persist(context=context, operator=self, project_id=project_id)
2846
+ DataprocBatchesListLink.persist(context=context, project_id=project_id)
2810
2847
  return [Batch.to_dict(result) for result in results]
2811
2848
 
2812
2849
 
@@ -2833,7 +2870,13 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
2833
2870
  account from the list granting this role to the originating account (templated).
2834
2871
  """
2835
2872
 
2836
- template_fields: Sequence[str] = ("operation_name", "region", "project_id", "impersonation_chain")
2873
+ template_fields: Sequence[str] = (
2874
+ "operation_name",
2875
+ "region",
2876
+ "project_id",
2877
+ "gcp_conn_id",
2878
+ "impersonation_chain",
2879
+ )
2837
2880
 
2838
2881
  def __init__(
2839
2882
  self,