apache-airflow-providers-google 10.14.0rc2__py3-none-any.whl → 10.15.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/cloud/hooks/automl.py +13 -13
  3. airflow/providers/google/cloud/hooks/bigquery.py +193 -246
  4. airflow/providers/google/cloud/hooks/bigquery_dts.py +6 -6
  5. airflow/providers/google/cloud/hooks/bigtable.py +8 -8
  6. airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
  7. airflow/providers/google/cloud/hooks/cloud_build.py +19 -20
  8. airflow/providers/google/cloud/hooks/cloud_composer.py +4 -4
  9. airflow/providers/google/cloud/hooks/cloud_memorystore.py +10 -10
  10. airflow/providers/google/cloud/hooks/cloud_run.py +1 -1
  11. airflow/providers/google/cloud/hooks/cloud_sql.py +17 -17
  12. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -3
  13. airflow/providers/google/cloud/hooks/compute.py +16 -16
  14. airflow/providers/google/cloud/hooks/compute_ssh.py +1 -1
  15. airflow/providers/google/cloud/hooks/datacatalog.py +22 -22
  16. airflow/providers/google/cloud/hooks/dataflow.py +48 -49
  17. airflow/providers/google/cloud/hooks/dataform.py +16 -16
  18. airflow/providers/google/cloud/hooks/datafusion.py +15 -15
  19. airflow/providers/google/cloud/hooks/datapipeline.py +3 -3
  20. airflow/providers/google/cloud/hooks/dataplex.py +19 -19
  21. airflow/providers/google/cloud/hooks/dataprep.py +8 -8
  22. airflow/providers/google/cloud/hooks/dataproc.py +88 -0
  23. airflow/providers/google/cloud/hooks/dataproc_metastore.py +13 -13
  24. airflow/providers/google/cloud/hooks/datastore.py +3 -3
  25. airflow/providers/google/cloud/hooks/dlp.py +25 -25
  26. airflow/providers/google/cloud/hooks/gcs.py +25 -23
  27. airflow/providers/google/cloud/hooks/gdm.py +3 -3
  28. airflow/providers/google/cloud/hooks/kms.py +3 -3
  29. airflow/providers/google/cloud/hooks/kubernetes_engine.py +63 -48
  30. airflow/providers/google/cloud/hooks/life_sciences.py +13 -12
  31. airflow/providers/google/cloud/hooks/looker.py +7 -7
  32. airflow/providers/google/cloud/hooks/mlengine.py +12 -12
  33. airflow/providers/google/cloud/hooks/natural_language.py +2 -2
  34. airflow/providers/google/cloud/hooks/os_login.py +1 -1
  35. airflow/providers/google/cloud/hooks/pubsub.py +9 -9
  36. airflow/providers/google/cloud/hooks/secret_manager.py +1 -1
  37. airflow/providers/google/cloud/hooks/spanner.py +11 -11
  38. airflow/providers/google/cloud/hooks/speech_to_text.py +1 -1
  39. airflow/providers/google/cloud/hooks/stackdriver.py +7 -7
  40. airflow/providers/google/cloud/hooks/tasks.py +11 -11
  41. airflow/providers/google/cloud/hooks/text_to_speech.py +1 -1
  42. airflow/providers/google/cloud/hooks/translate.py +1 -1
  43. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +13 -13
  44. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +6 -6
  45. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +45 -50
  46. airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +13 -13
  47. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +9 -9
  48. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +128 -11
  49. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +10 -10
  50. airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +8 -8
  51. airflow/providers/google/cloud/hooks/video_intelligence.py +2 -2
  52. airflow/providers/google/cloud/hooks/vision.py +1 -1
  53. airflow/providers/google/cloud/hooks/workflows.py +10 -10
  54. airflow/providers/google/cloud/links/datafusion.py +12 -5
  55. airflow/providers/google/cloud/operators/bigquery.py +9 -11
  56. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +3 -1
  57. airflow/providers/google/cloud/operators/dataflow.py +16 -16
  58. airflow/providers/google/cloud/operators/datafusion.py +9 -1
  59. airflow/providers/google/cloud/operators/dataproc.py +298 -65
  60. airflow/providers/google/cloud/operators/kubernetes_engine.py +6 -6
  61. airflow/providers/google/cloud/operators/life_sciences.py +10 -9
  62. airflow/providers/google/cloud/operators/mlengine.py +96 -96
  63. airflow/providers/google/cloud/operators/pubsub.py +2 -0
  64. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +33 -3
  65. airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +59 -2
  66. airflow/providers/google/cloud/secrets/secret_manager.py +8 -7
  67. airflow/providers/google/cloud/sensors/bigquery.py +20 -16
  68. airflow/providers/google/cloud/sensors/cloud_composer.py +11 -8
  69. airflow/providers/google/cloud/sensors/gcs.py +8 -7
  70. airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +4 -4
  71. airflow/providers/google/cloud/transfers/gcs_to_sftp.py +1 -1
  72. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
  73. airflow/providers/google/cloud/transfers/mysql_to_gcs.py +1 -1
  74. airflow/providers/google/cloud/transfers/oracle_to_gcs.py +1 -1
  75. airflow/providers/google/cloud/transfers/postgres_to_gcs.py +1 -1
  76. airflow/providers/google/cloud/transfers/presto_to_gcs.py +1 -1
  77. airflow/providers/google/cloud/transfers/s3_to_gcs.py +3 -3
  78. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +1 -1
  79. airflow/providers/google/cloud/transfers/sql_to_gcs.py +3 -3
  80. airflow/providers/google/cloud/transfers/trino_to_gcs.py +1 -1
  81. airflow/providers/google/cloud/triggers/bigquery.py +12 -12
  82. airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -1
  83. airflow/providers/google/cloud/triggers/cloud_batch.py +3 -1
  84. airflow/providers/google/cloud/triggers/cloud_build.py +2 -2
  85. airflow/providers/google/cloud/triggers/cloud_run.py +1 -1
  86. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +6 -6
  87. airflow/providers/google/cloud/triggers/dataflow.py +3 -1
  88. airflow/providers/google/cloud/triggers/datafusion.py +2 -2
  89. airflow/providers/google/cloud/triggers/dataplex.py +2 -2
  90. airflow/providers/google/cloud/triggers/dataproc.py +2 -2
  91. airflow/providers/google/cloud/triggers/gcs.py +12 -8
  92. airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -2
  93. airflow/providers/google/cloud/triggers/mlengine.py +2 -2
  94. airflow/providers/google/cloud/triggers/pubsub.py +1 -1
  95. airflow/providers/google/cloud/triggers/vertex_ai.py +99 -0
  96. airflow/providers/google/cloud/utils/bigquery.py +2 -2
  97. airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
  98. airflow/providers/google/cloud/utils/dataform.py +1 -1
  99. airflow/providers/google/cloud/utils/field_validator.py +2 -2
  100. airflow/providers/google/cloud/utils/helpers.py +2 -2
  101. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +1 -1
  102. airflow/providers/google/cloud/utils/mlengine_prediction_summary.py +1 -1
  103. airflow/providers/google/common/auth_backend/google_openid.py +2 -2
  104. airflow/providers/google/common/hooks/base_google.py +29 -22
  105. airflow/providers/google/common/hooks/discovery_api.py +2 -2
  106. airflow/providers/google/common/utils/id_token_credentials.py +5 -5
  107. airflow/providers/google/firebase/hooks/firestore.py +3 -3
  108. airflow/providers/google/get_provider_info.py +7 -2
  109. airflow/providers/google/leveldb/hooks/leveldb.py +2 -2
  110. airflow/providers/google/marketing_platform/hooks/analytics.py +11 -14
  111. airflow/providers/google/marketing_platform/hooks/campaign_manager.py +11 -11
  112. airflow/providers/google/marketing_platform/hooks/display_video.py +13 -13
  113. airflow/providers/google/marketing_platform/hooks/search_ads.py +4 -4
  114. airflow/providers/google/marketing_platform/operators/analytics.py +37 -32
  115. airflow/providers/google/suite/hooks/calendar.py +2 -2
  116. airflow/providers/google/suite/hooks/drive.py +7 -7
  117. airflow/providers/google/suite/hooks/sheets.py +8 -8
  118. {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/METADATA +11 -11
  119. {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/RECORD +121 -120
  120. {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/WHEEL +0 -0
  121. {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/entry_points.txt +0 -0
@@ -31,6 +31,7 @@ from datetime import datetime, timedelta
31
31
  from enum import Enum
32
32
  from typing import TYPE_CHECKING, Any, Sequence
33
33
 
34
+ from deprecated import deprecated
34
35
  from google.api_core.exceptions import AlreadyExists, NotFound
35
36
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
36
37
  from google.api_core.retry import Retry, exponential_sleep_generator
@@ -158,12 +159,18 @@ class ClusterGenerator:
158
159
  Valid values: ``pd-ssd`` (Persistent Disk Solid State Drive) or
159
160
  ``pd-standard`` (Persistent Disk Hard Disk Drive).
160
161
  :param master_disk_size: Disk size for the primary node
162
+ :param master_accelerator_type: Type of the accelerator card (GPU) to attach to the primary node,
163
+ see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
164
+ :param master_accelerator_count: Number of accelerator cards (GPUs) to attach to the primary node
161
165
  :param worker_machine_type: Compute engine machine type to use for the worker nodes
162
166
  :param worker_disk_type: Type of the boot disk for the worker node
163
167
  (default is ``pd-standard``).
164
168
  Valid values: ``pd-ssd`` (Persistent Disk Solid State Drive) or
165
169
  ``pd-standard`` (Persistent Disk Hard Disk Drive).
166
170
  :param worker_disk_size: Disk size for the worker nodes
171
+ :param worker_accelerator_type: Type of the accelerator card (GPU) to attach to the worker nodes,
172
+ see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
173
+ :param worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the worker nodes
167
174
  :param num_preemptible_workers: The # of VM instances in the instance group as secondary workers
168
175
  inside the cluster with Preemptibility enabled by default.
169
176
  Note, that it is not possible to mix non-preemptible and preemptible secondary workers in
@@ -200,6 +207,9 @@ class ClusterGenerator:
200
207
  identify the driver group in future operations, such as resizing the node group.
201
208
  :param secondary_worker_instance_flexibility_policy: Instance flexibility Policy allowing a mixture of VM
202
209
  shapes and provisioning models.
210
+ :param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
211
+ see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
212
+ :param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
203
213
  """
204
214
 
205
215
  def __init__(
@@ -227,9 +237,13 @@ class ClusterGenerator:
227
237
  master_machine_type: str = "n1-standard-4",
228
238
  master_disk_type: str = "pd-standard",
229
239
  master_disk_size: int = 1024,
240
+ master_accelerator_type: str | None = None,
241
+ master_accelerator_count: int | None = None,
230
242
  worker_machine_type: str = "n1-standard-4",
231
243
  worker_disk_type: str = "pd-standard",
232
244
  worker_disk_size: int = 1024,
245
+ worker_accelerator_type: str | None = None,
246
+ worker_accelerator_count: int | None = None,
233
247
  num_preemptible_workers: int = 0,
234
248
  preemptibility: str = PreemptibilityType.PREEMPTIBLE.value,
235
249
  service_account: str | None = None,
@@ -242,6 +256,8 @@ class ClusterGenerator:
242
256
  driver_pool_size: int = 0,
243
257
  driver_pool_id: str | None = None,
244
258
  secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
259
+ secondary_worker_accelerator_type: str | None = None,
260
+ secondary_worker_accelerator_count: int | None = None,
245
261
  **kwargs,
246
262
  ) -> None:
247
263
  self.project_id = project_id
@@ -263,10 +279,14 @@ class ClusterGenerator:
263
279
  self.master_machine_type = master_machine_type
264
280
  self.master_disk_type = master_disk_type
265
281
  self.master_disk_size = master_disk_size
282
+ self.master_accelerator_type = master_accelerator_type
283
+ self.master_accelerator_count = master_accelerator_count
266
284
  self.autoscaling_policy = autoscaling_policy
267
285
  self.worker_machine_type = worker_machine_type
268
286
  self.worker_disk_type = worker_disk_type
269
287
  self.worker_disk_size = worker_disk_size
288
+ self.worker_accelerator_type = worker_accelerator_type
289
+ self.worker_accelerator_count = worker_accelerator_count
270
290
  self.zone = zone
271
291
  self.network_uri = network_uri
272
292
  self.subnetwork_uri = subnetwork_uri
@@ -283,6 +303,8 @@ class ClusterGenerator:
283
303
  self.driver_pool_size = driver_pool_size
284
304
  self.driver_pool_id = driver_pool_id
285
305
  self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
306
+ self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
307
+ self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
286
308
 
287
309
  if self.custom_image and self.image_version:
288
310
  raise ValueError("The custom_image and image_version can't be both set")
@@ -339,10 +361,10 @@ class ClusterGenerator:
339
361
  if self.subnetwork_uri:
340
362
  cluster_data[config]["subnetwork_uri"] = self.subnetwork_uri
341
363
 
342
- if self.internal_ip_only:
343
- if not self.subnetwork_uri:
364
+ if self.internal_ip_only is not None:
365
+ if not self.subnetwork_uri and self.internal_ip_only:
344
366
  raise AirflowException("Set internal_ip_only to true only when you pass a subnetwork_uri.")
345
- cluster_data[config]["internal_ip_only"] = True
367
+ cluster_data[config]["internal_ip_only"] = self.internal_ip_only
346
368
 
347
369
  if self.tags:
348
370
  cluster_data[config]["tags"] = self.tags
@@ -423,6 +445,18 @@ class ClusterGenerator:
423
445
  if self.min_num_workers:
424
446
  cluster_data["worker_config"]["min_num_instances"] = self.min_num_workers
425
447
 
448
+ if self.master_accelerator_type:
449
+ cluster_data["master_config"]["accelerators"] = {
450
+ "accelerator_type_uri": self.master_accelerator_type,
451
+ "accelerator_count": self.master_accelerator_count,
452
+ }
453
+
454
+ if self.worker_accelerator_type:
455
+ cluster_data["worker_config"]["accelerators"] = {
456
+ "accelerator_type_uri": self.worker_accelerator_type,
457
+ "accelerator_count": self.worker_accelerator_count,
458
+ }
459
+
426
460
  if self.num_preemptible_workers > 0:
427
461
  cluster_data["secondary_worker_config"] = {
428
462
  "num_instances": self.num_preemptible_workers,
@@ -434,6 +468,11 @@ class ClusterGenerator:
434
468
  "is_preemptible": True,
435
469
  "preemptibility": self.preemptibility.value,
436
470
  }
471
+ if self.worker_accelerator_type:
472
+ cluster_data["secondary_worker_config"]["accelerators"] = {
473
+ "accelerator_type_uri": self.secondary_worker_accelerator_type,
474
+ "accelerator_count": self.secondary_worker_accelerator_count,
475
+ }
437
476
  if self.secondary_worker_instance_flexibility_policy:
438
477
  cluster_data["secondary_worker_config"]["instance_flexibility_policy"] = {
439
478
  "instance_selection_list": [
@@ -724,6 +763,17 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
724
763
  cluster = self._get_cluster(hook)
725
764
  return cluster
726
765
 
766
+ def _start_cluster(self, hook: DataprocHook):
767
+ op: operation.Operation = hook.start_cluster(
768
+ region=self.region,
769
+ project_id=self.project_id,
770
+ cluster_name=self.cluster_name,
771
+ retry=self.retry,
772
+ timeout=self.timeout,
773
+ metadata=self.metadata,
774
+ )
775
+ return hook.wait_for_operation(timeout=self.timeout, result_retry=self.retry, operation=op)
776
+
727
777
  def execute(self, context: Context) -> dict:
728
778
  self.log.info("Creating cluster: %s", self.cluster_name)
729
779
  hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
@@ -801,6 +851,9 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
801
851
  # Create new cluster
802
852
  cluster = self._create_cluster(hook)
803
853
  self._handle_error_state(hook, cluster)
854
+ elif cluster.status.state == cluster.status.State.STOPPED:
855
+ # if the cluster exists and already stopped, then start the cluster
856
+ self._start_cluster(hook)
804
857
 
805
858
  return Cluster.to_dict(cluster)
806
859
 
@@ -820,6 +873,11 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
820
873
  return event["cluster"]
821
874
 
822
875
 
876
+ # TODO: Remove one day
877
+ @deprecated(
878
+ reason="Please use `DataprocUpdateClusterOperator` instead.",
879
+ category=AirflowProviderDeprecationWarning,
880
+ )
823
881
  class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
824
882
  """Scale, up or down, a cluster on Google Cloud Dataproc.
825
883
 
@@ -888,14 +946,6 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
888
946
  self.gcp_conn_id = gcp_conn_id
889
947
  self.impersonation_chain = impersonation_chain
890
948
 
891
- # TODO: Remove one day
892
- warnings.warn(
893
- f"The `{type(self).__name__}` operator is deprecated, "
894
- "please use `DataprocUpdateClusterOperator` instead.",
895
- AirflowProviderDeprecationWarning,
896
- stacklevel=2,
897
- )
898
-
899
949
  def _build_scale_cluster_data(self) -> dict:
900
950
  scale_data = {
901
951
  "config": {
@@ -1082,6 +1132,189 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
1082
1132
  )
1083
1133
 
1084
1134
 
1135
+ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
1136
+ """Base class to start or stop a cluster in a project.
1137
+
1138
+ :param cluster_name: Required. Name of the cluster to create
1139
+ :param region: Required. The specified region where the dataproc cluster is created.
1140
+ :param project_id: Optional. The ID of the Google Cloud project the cluster belongs to.
1141
+ :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail
1142
+ if cluster with specified UUID does not exist.
1143
+ :param request_id: Optional. A unique id used to identify the request. If the server receives two
1144
+ ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and the
1145
+ first ``google.longrunning.Operation`` created and stored in the backend is returned.
1146
+ :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
1147
+ retried.
1148
+ :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
1149
+ ``retry`` is specified, the timeout applies to each individual attempt.
1150
+ :param metadata: Additional metadata that is provided to the method.
1151
+ :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
1152
+ :param impersonation_chain: Optional service account to impersonate using short-term
1153
+ credentials, or chained list of accounts required to get the access_token
1154
+ of the last account in the list, which will be impersonated in the request.
1155
+ If set as a string, the account must grant the originating account
1156
+ the Service Account Token Creator IAM role.
1157
+ If set as a sequence, the identities from the list must grant
1158
+ Service Account Token Creator IAM role to the directly preceding identity, with first
1159
+ account from the list granting this role to the originating account (templated).
1160
+ """
1161
+
1162
+ template_fields = (
1163
+ "cluster_name",
1164
+ "region",
1165
+ "project_id",
1166
+ "request_id",
1167
+ "impersonation_chain",
1168
+ )
1169
+
1170
+ def __init__(
1171
+ self,
1172
+ *,
1173
+ cluster_name: str,
1174
+ region: str,
1175
+ project_id: str | None = None,
1176
+ cluster_uuid: str | None = None,
1177
+ request_id: str | None = None,
1178
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1179
+ timeout: float = 1 * 60 * 60,
1180
+ metadata: Sequence[tuple[str, str]] = (),
1181
+ gcp_conn_id: str = "google_cloud_default",
1182
+ impersonation_chain: str | Sequence[str] | None = None,
1183
+ **kwargs,
1184
+ ) -> None:
1185
+ super().__init__(**kwargs)
1186
+ self.project_id = project_id
1187
+ self.region = region
1188
+ self.cluster_name = cluster_name
1189
+ self.cluster_uuid = cluster_uuid
1190
+ self.request_id = request_id
1191
+ self.retry = retry
1192
+ self.timeout = timeout
1193
+ self.metadata = metadata
1194
+ self.gcp_conn_id = gcp_conn_id
1195
+ self.impersonation_chain = impersonation_chain
1196
+ self._hook: DataprocHook | None = None
1197
+
1198
+ @property
1199
+ def hook(self):
1200
+ if self._hook is None:
1201
+ self._hook = DataprocHook(
1202
+ gcp_conn_id=self.gcp_conn_id,
1203
+ impersonation_chain=self.impersonation_chain,
1204
+ )
1205
+ return self._hook
1206
+
1207
+ def _get_project_id(self) -> str:
1208
+ return self.project_id or self.hook.project_id
1209
+
1210
+ def _get_cluster(self) -> Cluster:
1211
+ """Retrieve the cluster information.
1212
+
1213
+ :return: Instance of ``google.cloud.dataproc_v1.Cluster``` class
1214
+ """
1215
+ return self.hook.get_cluster(
1216
+ project_id=self._get_project_id(),
1217
+ region=self.region,
1218
+ cluster_name=self.cluster_name,
1219
+ retry=self.retry,
1220
+ timeout=self.timeout,
1221
+ metadata=self.metadata,
1222
+ )
1223
+
1224
+ def _check_desired_cluster_state(self, cluster: Cluster) -> tuple[bool, str | None]:
1225
+ """Implement this method in child class to return whether the cluster is in desired state or not.
1226
+
1227
+ If the cluster is in desired stated you can return a log message content as a second value
1228
+ for the return tuple.
1229
+
1230
+ :param cluster: Required. Instance of ``google.cloud.dataproc_v1.Cluster``
1231
+ class to interact with Dataproc API
1232
+ :return: Tuple of (Boolean, Optional[str]) The first value of the tuple is whether the cluster is
1233
+ in desired state or not. The second value of the tuple will use if you want to log something when
1234
+ the cluster is in desired state already.
1235
+ """
1236
+ raise NotImplementedError
1237
+
1238
+ def _get_operation(self) -> operation.Operation:
1239
+ """Implement this method in child class to call the related hook method and return its result.
1240
+
1241
+ :return: ``google.api_core.operation.Operation`` value whether the cluster is in desired state or not
1242
+ """
1243
+ raise NotImplementedError
1244
+
1245
+ def execute(self, context: Context) -> dict | None:
1246
+ cluster: Cluster = self._get_cluster()
1247
+ is_already_desired_state, log_str = self._check_desired_cluster_state(cluster)
1248
+ if is_already_desired_state:
1249
+ self.log.info(log_str)
1250
+ return None
1251
+
1252
+ op: operation.Operation = self._get_operation()
1253
+ result = self.hook.wait_for_operation(timeout=self.timeout, result_retry=self.retry, operation=op)
1254
+ return Cluster.to_dict(result)
1255
+
1256
+
1257
+ class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
1258
+ """Start a cluster in a project."""
1259
+
1260
+ operator_extra_links = (DataprocClusterLink(),)
1261
+
1262
+ def execute(self, context: Context) -> dict | None:
1263
+ self.log.info("Starting the cluster: %s", self.cluster_name)
1264
+ cluster = super().execute(context)
1265
+ DataprocClusterLink.persist(
1266
+ context=context,
1267
+ operator=self,
1268
+ cluster_id=self.cluster_name,
1269
+ project_id=self._get_project_id(),
1270
+ region=self.region,
1271
+ )
1272
+ self.log.info("Cluster started")
1273
+ return cluster
1274
+
1275
+ def _check_desired_cluster_state(self, cluster: Cluster) -> tuple[bool, str | None]:
1276
+ if cluster.status.state == cluster.status.State.RUNNING:
1277
+ return True, f'The cluster "{self.cluster_name}" already running!'
1278
+ return False, None
1279
+
1280
+ def _get_operation(self) -> operation.Operation:
1281
+ return self.hook.start_cluster(
1282
+ region=self.region,
1283
+ project_id=self._get_project_id(),
1284
+ cluster_name=self.cluster_name,
1285
+ cluster_uuid=self.cluster_uuid,
1286
+ retry=self.retry,
1287
+ timeout=self.timeout,
1288
+ metadata=self.metadata,
1289
+ )
1290
+
1291
+
1292
+ class DataprocStopClusterOperator(_DataprocStartStopClusterBaseOperator):
1293
+ """Stop a cluster in a project."""
1294
+
1295
+ def execute(self, context: Context) -> dict | None:
1296
+ self.log.info("Stopping the cluster: %s", self.cluster_name)
1297
+ cluster = super().execute(context)
1298
+ self.log.info("Cluster stopped")
1299
+ return cluster
1300
+
1301
+ def _check_desired_cluster_state(self, cluster: Cluster) -> tuple[bool, str | None]:
1302
+ if cluster.status.state in [cluster.status.State.STOPPED, cluster.status.State.STOPPING]:
1303
+ return True, f'The cluster "{self.cluster_name}" already stopped!'
1304
+ return False, None
1305
+
1306
+ def _get_operation(self) -> operation.Operation:
1307
+ return self.hook.stop_cluster(
1308
+ region=self.region,
1309
+ project_id=self._get_project_id(),
1310
+ cluster_name=self.cluster_name,
1311
+ cluster_uuid=self.cluster_uuid,
1312
+ retry=self.retry,
1313
+ timeout=self.timeout,
1314
+ metadata=self.metadata,
1315
+ )
1316
+
1317
+
1085
1318
  class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1086
1319
  """Base class for operators that launch job on DataProc.
1087
1320
 
@@ -1256,6 +1489,15 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
1256
1489
  self.hook.cancel_job(project_id=self.project_id, job_id=self.dataproc_job_id, region=self.region)
1257
1490
 
1258
1491
 
1492
+ # TODO: Remove one day
1493
+ @deprecated(
1494
+ reason=(
1495
+ "Please use `DataprocSubmitJobOperator` instead. "
1496
+ "You can use `generate_job` method to generate dictionary representing your job "
1497
+ "and use it with the new operator."
1498
+ ),
1499
+ category=AirflowProviderDeprecationWarning,
1500
+ )
1259
1501
  class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1260
1502
  """Start a Pig query Job on a Cloud DataProc cluster.
1261
1503
 
@@ -1330,15 +1572,6 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1330
1572
  dataproc_jars: list[str] | None = None,
1331
1573
  **kwargs,
1332
1574
  ) -> None:
1333
- # TODO: Remove one day
1334
- warnings.warn(
1335
- "The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
1336
- " `generate_job` method of `{cls}` to generate dictionary representing your job"
1337
- " and use it with the new operator.".format(cls=type(self).__name__),
1338
- AirflowProviderDeprecationWarning,
1339
- stacklevel=2,
1340
- )
1341
-
1342
1575
  super().__init__(
1343
1576
  impersonation_chain=impersonation_chain,
1344
1577
  region=region,
@@ -1382,6 +1615,15 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
1382
1615
  super().execute(context)
1383
1616
 
1384
1617
 
1618
+ # TODO: Remove one day
1619
+ @deprecated(
1620
+ reason=(
1621
+ "Please use `DataprocSubmitJobOperator` instead. "
1622
+ "You can use `generate_job` method to generate dictionary representing your job "
1623
+ "and use it with the new operator."
1624
+ ),
1625
+ category=AirflowProviderDeprecationWarning,
1626
+ )
1385
1627
  class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
1386
1628
  """Start a Hive query Job on a Cloud DataProc cluster.
1387
1629
 
@@ -1422,15 +1664,6 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
1422
1664
  dataproc_jars: list[str] | None = None,
1423
1665
  **kwargs,
1424
1666
  ) -> None:
1425
- # TODO: Remove one day
1426
- warnings.warn(
1427
- "The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
1428
- " `generate_job` method of `{cls}` to generate dictionary representing your job"
1429
- " and use it with the new operator.".format(cls=type(self).__name__),
1430
- AirflowProviderDeprecationWarning,
1431
- stacklevel=2,
1432
- )
1433
-
1434
1667
  super().__init__(
1435
1668
  impersonation_chain=impersonation_chain,
1436
1669
  region=region,
@@ -1474,6 +1707,15 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
1474
1707
  super().execute(context)
1475
1708
 
1476
1709
 
1710
+ # TODO: Remove one day
1711
+ @deprecated(
1712
+ reason=(
1713
+ "Please use `DataprocSubmitJobOperator` instead. "
1714
+ "You can use `generate_job` method to generate dictionary representing your job "
1715
+ "and use it with the new operator."
1716
+ ),
1717
+ category=AirflowProviderDeprecationWarning,
1718
+ )
1477
1719
  class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
1478
1720
  """Start a Spark SQL query Job on a Cloud DataProc cluster.
1479
1721
 
@@ -1515,15 +1757,6 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
1515
1757
  dataproc_jars: list[str] | None = None,
1516
1758
  **kwargs,
1517
1759
  ) -> None:
1518
- # TODO: Remove one day
1519
- warnings.warn(
1520
- "The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
1521
- " `generate_job` method of `{cls}` to generate dictionary representing your job"
1522
- " and use it with the new operator.".format(cls=type(self).__name__),
1523
- AirflowProviderDeprecationWarning,
1524
- stacklevel=2,
1525
- )
1526
-
1527
1760
  super().__init__(
1528
1761
  impersonation_chain=impersonation_chain,
1529
1762
  region=region,
@@ -1565,6 +1798,15 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
1565
1798
  super().execute(context)
1566
1799
 
1567
1800
 
1801
+ # TODO: Remove one day
1802
+ @deprecated(
1803
+ reason=(
1804
+ "Please use `DataprocSubmitJobOperator` instead. "
1805
+ "You can use `generate_job` method to generate dictionary representing your job "
1806
+ "and use it with the new operator."
1807
+ ),
1808
+ category=AirflowProviderDeprecationWarning,
1809
+ )
1568
1810
  class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
1569
1811
  """Start a Spark Job on a Cloud DataProc cluster.
1570
1812
 
@@ -1610,15 +1852,6 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
1610
1852
  dataproc_jars: list[str] | None = None,
1611
1853
  **kwargs,
1612
1854
  ) -> None:
1613
- # TODO: Remove one day
1614
- warnings.warn(
1615
- "The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
1616
- " `generate_job` method of `{cls}` to generate dictionary representing your job"
1617
- " and use it with the new operator.".format(cls=type(self).__name__),
1618
- AirflowProviderDeprecationWarning,
1619
- stacklevel=2,
1620
- )
1621
-
1622
1855
  super().__init__(
1623
1856
  impersonation_chain=impersonation_chain,
1624
1857
  region=region,
@@ -1656,6 +1889,15 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
1656
1889
  super().execute(context)
1657
1890
 
1658
1891
 
1892
+ # TODO: Remove one day
1893
+ @deprecated(
1894
+ reason=(
1895
+ "Please use `DataprocSubmitJobOperator` instead. "
1896
+ "You can use `generate_job` method to generate dictionary representing your job "
1897
+ "and use it with the new operator."
1898
+ ),
1899
+ category=AirflowProviderDeprecationWarning,
1900
+ )
1659
1901
  class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1660
1902
  """Start a Hadoop Job on a Cloud DataProc cluster.
1661
1903
 
@@ -1701,15 +1943,6 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1701
1943
  dataproc_jars: list[str] | None = None,
1702
1944
  **kwargs,
1703
1945
  ) -> None:
1704
- # TODO: Remove one day
1705
- warnings.warn(
1706
- "The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
1707
- " `generate_job` method of `{cls}` to generate dictionary representing your job"
1708
- " and use it with the new operator.".format(cls=type(self).__name__),
1709
- AirflowProviderDeprecationWarning,
1710
- stacklevel=2,
1711
- )
1712
-
1713
1946
  super().__init__(
1714
1947
  impersonation_chain=impersonation_chain,
1715
1948
  region=region,
@@ -1746,6 +1979,15 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
1746
1979
  super().execute(context)
1747
1980
 
1748
1981
 
1982
+ # TODO: Remove one day
1983
+ @deprecated(
1984
+ reason=(
1985
+ "Please use `DataprocSubmitJobOperator` instead. "
1986
+ "You can use `generate_job` method to generate dictionary representing your job "
1987
+ "and use it with the new operator."
1988
+ ),
1989
+ category=AirflowProviderDeprecationWarning,
1990
+ )
1749
1991
  class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
1750
1992
  """Start a PySpark Job on a Cloud DataProc cluster.
1751
1993
 
@@ -1815,15 +2057,6 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
1815
2057
  dataproc_jars: list[str] | None = None,
1816
2058
  **kwargs,
1817
2059
  ) -> None:
1818
- # TODO: Remove one day
1819
- warnings.warn(
1820
- "The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
1821
- " `generate_job` method of `{cls}` to generate dictionary representing your job"
1822
- " and use it with the new operator.".format(cls=type(self).__name__),
1823
- AirflowProviderDeprecationWarning,
1824
- stacklevel=2,
1825
- )
1826
-
1827
2060
  super().__init__(
1828
2061
  impersonation_chain=impersonation_chain,
1829
2062
  region=region,
@@ -22,6 +22,7 @@ import warnings
22
22
  from functools import cached_property
23
23
  from typing import TYPE_CHECKING, Any, Sequence
24
24
 
25
+ from deprecated import deprecated
25
26
  from google.api_core.exceptions import AlreadyExists
26
27
  from google.cloud.container_v1.types import Cluster
27
28
 
@@ -510,13 +511,12 @@ class GKEStartPodOperator(KubernetesPodOperator):
510
511
  raise AirflowException("config_file is not an allowed parameter for the GKEStartPodOperator.")
511
512
 
512
513
  @staticmethod
514
+ @deprecated(
515
+ reason="Please use `fetch_cluster_info` instead to get the cluster info for connecting to it.",
516
+ category=AirflowProviderDeprecationWarning,
517
+ )
513
518
  def get_gke_config_file():
514
- warnings.warn(
515
- "The `get_gke_config_file` method is deprecated, "
516
- "please use `fetch_cluster_info` instead to get the cluster info for connecting to it.",
517
- AirflowProviderDeprecationWarning,
518
- stacklevel=2,
519
- )
519
+ pass
520
520
 
521
521
  @cached_property
522
522
  def cluster_hook(self) -> GKEHook:
@@ -18,9 +18,10 @@
18
18
  """Operators that interact with Google Cloud Life Sciences service."""
19
19
  from __future__ import annotations
20
20
 
21
- import warnings
22
21
  from typing import TYPE_CHECKING, Sequence
23
22
 
23
+ from deprecated import deprecated
24
+
24
25
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
25
26
  from airflow.providers.google.cloud.hooks.life_sciences import LifeSciencesHook
26
27
  from airflow.providers.google.cloud.links.life_sciences import LifeSciencesLink
@@ -30,6 +31,14 @@ if TYPE_CHECKING:
30
31
  from airflow.utils.context import Context
31
32
 
32
33
 
34
+ @deprecated(
35
+ reason=(
36
+ "Consider using Google Cloud Batch Operators instead."
37
+ "The Life Sciences API (beta) will be discontinued "
38
+ "on July 8, 2025 in favor of Google Cloud Batch."
39
+ ),
40
+ category=AirflowProviderDeprecationWarning,
41
+ )
33
42
  class LifeSciencesRunPipelineOperator(GoogleCloudBaseOperator):
34
43
  """
35
44
  Runs a Life Sciences Pipeline.
@@ -87,14 +96,6 @@ class LifeSciencesRunPipelineOperator(GoogleCloudBaseOperator):
87
96
  self._validate_inputs()
88
97
  self.impersonation_chain = impersonation_chain
89
98
 
90
- warnings.warn(
91
- """This operator is deprecated. Consider using Google Cloud Batch Operators instead.
92
- The Life Sciences API (beta) will be discontinued on July 8, 2025 in favor
93
- of Google Cloud Batch.""",
94
- AirflowProviderDeprecationWarning,
95
- stacklevel=3,
96
- )
97
-
98
99
  def _validate_inputs(self) -> None:
99
100
  if not self.body:
100
101
  raise AirflowException("The required parameter 'body' is missing")