apache-airflow-providers-google 10.14.0rc2__py3-none-any.whl → 10.15.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/cloud/hooks/automl.py +13 -13
- airflow/providers/google/cloud/hooks/bigquery.py +193 -246
- airflow/providers/google/cloud/hooks/bigquery_dts.py +6 -6
- airflow/providers/google/cloud/hooks/bigtable.py +8 -8
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +19 -20
- airflow/providers/google/cloud/hooks/cloud_composer.py +4 -4
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +10 -10
- airflow/providers/google/cloud/hooks/cloud_run.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_sql.py +17 -17
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +3 -3
- airflow/providers/google/cloud/hooks/compute.py +16 -16
- airflow/providers/google/cloud/hooks/compute_ssh.py +1 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +22 -22
- airflow/providers/google/cloud/hooks/dataflow.py +48 -49
- airflow/providers/google/cloud/hooks/dataform.py +16 -16
- airflow/providers/google/cloud/hooks/datafusion.py +15 -15
- airflow/providers/google/cloud/hooks/datapipeline.py +3 -3
- airflow/providers/google/cloud/hooks/dataplex.py +19 -19
- airflow/providers/google/cloud/hooks/dataprep.py +8 -8
- airflow/providers/google/cloud/hooks/dataproc.py +88 -0
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +13 -13
- airflow/providers/google/cloud/hooks/datastore.py +3 -3
- airflow/providers/google/cloud/hooks/dlp.py +25 -25
- airflow/providers/google/cloud/hooks/gcs.py +25 -23
- airflow/providers/google/cloud/hooks/gdm.py +3 -3
- airflow/providers/google/cloud/hooks/kms.py +3 -3
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +63 -48
- airflow/providers/google/cloud/hooks/life_sciences.py +13 -12
- airflow/providers/google/cloud/hooks/looker.py +7 -7
- airflow/providers/google/cloud/hooks/mlengine.py +12 -12
- airflow/providers/google/cloud/hooks/natural_language.py +2 -2
- airflow/providers/google/cloud/hooks/os_login.py +1 -1
- airflow/providers/google/cloud/hooks/pubsub.py +9 -9
- airflow/providers/google/cloud/hooks/secret_manager.py +1 -1
- airflow/providers/google/cloud/hooks/spanner.py +11 -11
- airflow/providers/google/cloud/hooks/speech_to_text.py +1 -1
- airflow/providers/google/cloud/hooks/stackdriver.py +7 -7
- airflow/providers/google/cloud/hooks/tasks.py +11 -11
- airflow/providers/google/cloud/hooks/text_to_speech.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +13 -13
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +6 -6
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +45 -50
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +13 -13
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +9 -9
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +128 -11
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +10 -10
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +8 -8
- airflow/providers/google/cloud/hooks/video_intelligence.py +2 -2
- airflow/providers/google/cloud/hooks/vision.py +1 -1
- airflow/providers/google/cloud/hooks/workflows.py +10 -10
- airflow/providers/google/cloud/links/datafusion.py +12 -5
- airflow/providers/google/cloud/operators/bigquery.py +9 -11
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +3 -1
- airflow/providers/google/cloud/operators/dataflow.py +16 -16
- airflow/providers/google/cloud/operators/datafusion.py +9 -1
- airflow/providers/google/cloud/operators/dataproc.py +298 -65
- airflow/providers/google/cloud/operators/kubernetes_engine.py +6 -6
- airflow/providers/google/cloud/operators/life_sciences.py +10 -9
- airflow/providers/google/cloud/operators/mlengine.py +96 -96
- airflow/providers/google/cloud/operators/pubsub.py +2 -0
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +33 -3
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +59 -2
- airflow/providers/google/cloud/secrets/secret_manager.py +8 -7
- airflow/providers/google/cloud/sensors/bigquery.py +20 -16
- airflow/providers/google/cloud/sensors/cloud_composer.py +11 -8
- airflow/providers/google/cloud/sensors/gcs.py +8 -7
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +1 -1
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/presto_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/trino_to_gcs.py +1 -1
- airflow/providers/google/cloud/triggers/bigquery.py +12 -12
- airflow/providers/google/cloud/triggers/bigquery_dts.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +3 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_run.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +6 -6
- airflow/providers/google/cloud/triggers/dataflow.py +3 -1
- airflow/providers/google/cloud/triggers/datafusion.py +2 -2
- airflow/providers/google/cloud/triggers/dataplex.py +2 -2
- airflow/providers/google/cloud/triggers/dataproc.py +2 -2
- airflow/providers/google/cloud/triggers/gcs.py +12 -8
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/triggers/mlengine.py +2 -2
- airflow/providers/google/cloud/triggers/pubsub.py +1 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +99 -0
- airflow/providers/google/cloud/utils/bigquery.py +2 -2
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -2
- airflow/providers/google/cloud/utils/helpers.py +2 -2
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +1 -1
- airflow/providers/google/cloud/utils/mlengine_prediction_summary.py +1 -1
- airflow/providers/google/common/auth_backend/google_openid.py +2 -2
- airflow/providers/google/common/hooks/base_google.py +29 -22
- airflow/providers/google/common/hooks/discovery_api.py +2 -2
- airflow/providers/google/common/utils/id_token_credentials.py +5 -5
- airflow/providers/google/firebase/hooks/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +7 -2
- airflow/providers/google/leveldb/hooks/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/analytics.py +11 -14
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +11 -11
- airflow/providers/google/marketing_platform/hooks/display_video.py +13 -13
- airflow/providers/google/marketing_platform/hooks/search_ads.py +4 -4
- airflow/providers/google/marketing_platform/operators/analytics.py +37 -32
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/drive.py +7 -7
- airflow/providers/google/suite/hooks/sheets.py +8 -8
- {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/METADATA +11 -11
- {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/RECORD +121 -120
- {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.14.0rc2.dist-info → apache_airflow_providers_google-10.15.0rc1.dist-info}/entry_points.txt +0 -0
@@ -31,6 +31,7 @@ from datetime import datetime, timedelta
|
|
31
31
|
from enum import Enum
|
32
32
|
from typing import TYPE_CHECKING, Any, Sequence
|
33
33
|
|
34
|
+
from deprecated import deprecated
|
34
35
|
from google.api_core.exceptions import AlreadyExists, NotFound
|
35
36
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
36
37
|
from google.api_core.retry import Retry, exponential_sleep_generator
|
@@ -158,12 +159,18 @@ class ClusterGenerator:
|
|
158
159
|
Valid values: ``pd-ssd`` (Persistent Disk Solid State Drive) or
|
159
160
|
``pd-standard`` (Persistent Disk Hard Disk Drive).
|
160
161
|
:param master_disk_size: Disk size for the primary node
|
162
|
+
:param master_accelerator_type: Type of the accelerator card (GPU) to attach to the primary node,
|
163
|
+
see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
|
164
|
+
:param master_accelerator_count: Number of accelerator cards (GPUs) to attach to the primary node
|
161
165
|
:param worker_machine_type: Compute engine machine type to use for the worker nodes
|
162
166
|
:param worker_disk_type: Type of the boot disk for the worker node
|
163
167
|
(default is ``pd-standard``).
|
164
168
|
Valid values: ``pd-ssd`` (Persistent Disk Solid State Drive) or
|
165
169
|
``pd-standard`` (Persistent Disk Hard Disk Drive).
|
166
170
|
:param worker_disk_size: Disk size for the worker nodes
|
171
|
+
:param worker_accelerator_type: Type of the accelerator card (GPU) to attach to the worker nodes,
|
172
|
+
see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
|
173
|
+
:param worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the worker nodes
|
167
174
|
:param num_preemptible_workers: The # of VM instances in the instance group as secondary workers
|
168
175
|
inside the cluster with Preemptibility enabled by default.
|
169
176
|
Note, that it is not possible to mix non-preemptible and preemptible secondary workers in
|
@@ -200,6 +207,9 @@ class ClusterGenerator:
|
|
200
207
|
identify the driver group in future operations, such as resizing the node group.
|
201
208
|
:param secondary_worker_instance_flexibility_policy: Instance flexibility Policy allowing a mixture of VM
|
202
209
|
shapes and provisioning models.
|
210
|
+
:param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
|
211
|
+
see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
|
212
|
+
:param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
|
203
213
|
"""
|
204
214
|
|
205
215
|
def __init__(
|
@@ -227,9 +237,13 @@ class ClusterGenerator:
|
|
227
237
|
master_machine_type: str = "n1-standard-4",
|
228
238
|
master_disk_type: str = "pd-standard",
|
229
239
|
master_disk_size: int = 1024,
|
240
|
+
master_accelerator_type: str | None = None,
|
241
|
+
master_accelerator_count: int | None = None,
|
230
242
|
worker_machine_type: str = "n1-standard-4",
|
231
243
|
worker_disk_type: str = "pd-standard",
|
232
244
|
worker_disk_size: int = 1024,
|
245
|
+
worker_accelerator_type: str | None = None,
|
246
|
+
worker_accelerator_count: int | None = None,
|
233
247
|
num_preemptible_workers: int = 0,
|
234
248
|
preemptibility: str = PreemptibilityType.PREEMPTIBLE.value,
|
235
249
|
service_account: str | None = None,
|
@@ -242,6 +256,8 @@ class ClusterGenerator:
|
|
242
256
|
driver_pool_size: int = 0,
|
243
257
|
driver_pool_id: str | None = None,
|
244
258
|
secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
|
259
|
+
secondary_worker_accelerator_type: str | None = None,
|
260
|
+
secondary_worker_accelerator_count: int | None = None,
|
245
261
|
**kwargs,
|
246
262
|
) -> None:
|
247
263
|
self.project_id = project_id
|
@@ -263,10 +279,14 @@ class ClusterGenerator:
|
|
263
279
|
self.master_machine_type = master_machine_type
|
264
280
|
self.master_disk_type = master_disk_type
|
265
281
|
self.master_disk_size = master_disk_size
|
282
|
+
self.master_accelerator_type = master_accelerator_type
|
283
|
+
self.master_accelerator_count = master_accelerator_count
|
266
284
|
self.autoscaling_policy = autoscaling_policy
|
267
285
|
self.worker_machine_type = worker_machine_type
|
268
286
|
self.worker_disk_type = worker_disk_type
|
269
287
|
self.worker_disk_size = worker_disk_size
|
288
|
+
self.worker_accelerator_type = worker_accelerator_type
|
289
|
+
self.worker_accelerator_count = worker_accelerator_count
|
270
290
|
self.zone = zone
|
271
291
|
self.network_uri = network_uri
|
272
292
|
self.subnetwork_uri = subnetwork_uri
|
@@ -283,6 +303,8 @@ class ClusterGenerator:
|
|
283
303
|
self.driver_pool_size = driver_pool_size
|
284
304
|
self.driver_pool_id = driver_pool_id
|
285
305
|
self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
|
306
|
+
self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
|
307
|
+
self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
|
286
308
|
|
287
309
|
if self.custom_image and self.image_version:
|
288
310
|
raise ValueError("The custom_image and image_version can't be both set")
|
@@ -339,10 +361,10 @@ class ClusterGenerator:
|
|
339
361
|
if self.subnetwork_uri:
|
340
362
|
cluster_data[config]["subnetwork_uri"] = self.subnetwork_uri
|
341
363
|
|
342
|
-
if self.internal_ip_only:
|
343
|
-
if not self.subnetwork_uri:
|
364
|
+
if self.internal_ip_only is not None:
|
365
|
+
if not self.subnetwork_uri and self.internal_ip_only:
|
344
366
|
raise AirflowException("Set internal_ip_only to true only when you pass a subnetwork_uri.")
|
345
|
-
cluster_data[config]["internal_ip_only"] =
|
367
|
+
cluster_data[config]["internal_ip_only"] = self.internal_ip_only
|
346
368
|
|
347
369
|
if self.tags:
|
348
370
|
cluster_data[config]["tags"] = self.tags
|
@@ -423,6 +445,18 @@ class ClusterGenerator:
|
|
423
445
|
if self.min_num_workers:
|
424
446
|
cluster_data["worker_config"]["min_num_instances"] = self.min_num_workers
|
425
447
|
|
448
|
+
if self.master_accelerator_type:
|
449
|
+
cluster_data["master_config"]["accelerators"] = {
|
450
|
+
"accelerator_type_uri": self.master_accelerator_type,
|
451
|
+
"accelerator_count": self.master_accelerator_count,
|
452
|
+
}
|
453
|
+
|
454
|
+
if self.worker_accelerator_type:
|
455
|
+
cluster_data["worker_config"]["accelerators"] = {
|
456
|
+
"accelerator_type_uri": self.worker_accelerator_type,
|
457
|
+
"accelerator_count": self.worker_accelerator_count,
|
458
|
+
}
|
459
|
+
|
426
460
|
if self.num_preemptible_workers > 0:
|
427
461
|
cluster_data["secondary_worker_config"] = {
|
428
462
|
"num_instances": self.num_preemptible_workers,
|
@@ -434,6 +468,11 @@ class ClusterGenerator:
|
|
434
468
|
"is_preemptible": True,
|
435
469
|
"preemptibility": self.preemptibility.value,
|
436
470
|
}
|
471
|
+
if self.worker_accelerator_type:
|
472
|
+
cluster_data["secondary_worker_config"]["accelerators"] = {
|
473
|
+
"accelerator_type_uri": self.secondary_worker_accelerator_type,
|
474
|
+
"accelerator_count": self.secondary_worker_accelerator_count,
|
475
|
+
}
|
437
476
|
if self.secondary_worker_instance_flexibility_policy:
|
438
477
|
cluster_data["secondary_worker_config"]["instance_flexibility_policy"] = {
|
439
478
|
"instance_selection_list": [
|
@@ -724,6 +763,17 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
724
763
|
cluster = self._get_cluster(hook)
|
725
764
|
return cluster
|
726
765
|
|
766
|
+
def _start_cluster(self, hook: DataprocHook):
|
767
|
+
op: operation.Operation = hook.start_cluster(
|
768
|
+
region=self.region,
|
769
|
+
project_id=self.project_id,
|
770
|
+
cluster_name=self.cluster_name,
|
771
|
+
retry=self.retry,
|
772
|
+
timeout=self.timeout,
|
773
|
+
metadata=self.metadata,
|
774
|
+
)
|
775
|
+
return hook.wait_for_operation(timeout=self.timeout, result_retry=self.retry, operation=op)
|
776
|
+
|
727
777
|
def execute(self, context: Context) -> dict:
|
728
778
|
self.log.info("Creating cluster: %s", self.cluster_name)
|
729
779
|
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
@@ -801,6 +851,9 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
801
851
|
# Create new cluster
|
802
852
|
cluster = self._create_cluster(hook)
|
803
853
|
self._handle_error_state(hook, cluster)
|
854
|
+
elif cluster.status.state == cluster.status.State.STOPPED:
|
855
|
+
# if the cluster exists and already stopped, then start the cluster
|
856
|
+
self._start_cluster(hook)
|
804
857
|
|
805
858
|
return Cluster.to_dict(cluster)
|
806
859
|
|
@@ -820,6 +873,11 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
820
873
|
return event["cluster"]
|
821
874
|
|
822
875
|
|
876
|
+
# TODO: Remove one day
|
877
|
+
@deprecated(
|
878
|
+
reason="Please use `DataprocUpdateClusterOperator` instead.",
|
879
|
+
category=AirflowProviderDeprecationWarning,
|
880
|
+
)
|
823
881
|
class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
824
882
|
"""Scale, up or down, a cluster on Google Cloud Dataproc.
|
825
883
|
|
@@ -888,14 +946,6 @@ class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
|
888
946
|
self.gcp_conn_id = gcp_conn_id
|
889
947
|
self.impersonation_chain = impersonation_chain
|
890
948
|
|
891
|
-
# TODO: Remove one day
|
892
|
-
warnings.warn(
|
893
|
-
f"The `{type(self).__name__}` operator is deprecated, "
|
894
|
-
"please use `DataprocUpdateClusterOperator` instead.",
|
895
|
-
AirflowProviderDeprecationWarning,
|
896
|
-
stacklevel=2,
|
897
|
-
)
|
898
|
-
|
899
949
|
def _build_scale_cluster_data(self) -> dict:
|
900
950
|
scale_data = {
|
901
951
|
"config": {
|
@@ -1082,6 +1132,189 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
1082
1132
|
)
|
1083
1133
|
|
1084
1134
|
|
1135
|
+
class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
|
1136
|
+
"""Base class to start or stop a cluster in a project.
|
1137
|
+
|
1138
|
+
:param cluster_name: Required. Name of the cluster to create
|
1139
|
+
:param region: Required. The specified region where the dataproc cluster is created.
|
1140
|
+
:param project_id: Optional. The ID of the Google Cloud project the cluster belongs to.
|
1141
|
+
:param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail
|
1142
|
+
if cluster with specified UUID does not exist.
|
1143
|
+
:param request_id: Optional. A unique id used to identify the request. If the server receives two
|
1144
|
+
``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and the
|
1145
|
+
first ``google.longrunning.Operation`` created and stored in the backend is returned.
|
1146
|
+
:param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be
|
1147
|
+
retried.
|
1148
|
+
:param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if
|
1149
|
+
``retry`` is specified, the timeout applies to each individual attempt.
|
1150
|
+
:param metadata: Additional metadata that is provided to the method.
|
1151
|
+
:param gcp_conn_id: The connection ID to use connecting to Google Cloud.
|
1152
|
+
:param impersonation_chain: Optional service account to impersonate using short-term
|
1153
|
+
credentials, or chained list of accounts required to get the access_token
|
1154
|
+
of the last account in the list, which will be impersonated in the request.
|
1155
|
+
If set as a string, the account must grant the originating account
|
1156
|
+
the Service Account Token Creator IAM role.
|
1157
|
+
If set as a sequence, the identities from the list must grant
|
1158
|
+
Service Account Token Creator IAM role to the directly preceding identity, with first
|
1159
|
+
account from the list granting this role to the originating account (templated).
|
1160
|
+
"""
|
1161
|
+
|
1162
|
+
template_fields = (
|
1163
|
+
"cluster_name",
|
1164
|
+
"region",
|
1165
|
+
"project_id",
|
1166
|
+
"request_id",
|
1167
|
+
"impersonation_chain",
|
1168
|
+
)
|
1169
|
+
|
1170
|
+
def __init__(
|
1171
|
+
self,
|
1172
|
+
*,
|
1173
|
+
cluster_name: str,
|
1174
|
+
region: str,
|
1175
|
+
project_id: str | None = None,
|
1176
|
+
cluster_uuid: str | None = None,
|
1177
|
+
request_id: str | None = None,
|
1178
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1179
|
+
timeout: float = 1 * 60 * 60,
|
1180
|
+
metadata: Sequence[tuple[str, str]] = (),
|
1181
|
+
gcp_conn_id: str = "google_cloud_default",
|
1182
|
+
impersonation_chain: str | Sequence[str] | None = None,
|
1183
|
+
**kwargs,
|
1184
|
+
) -> None:
|
1185
|
+
super().__init__(**kwargs)
|
1186
|
+
self.project_id = project_id
|
1187
|
+
self.region = region
|
1188
|
+
self.cluster_name = cluster_name
|
1189
|
+
self.cluster_uuid = cluster_uuid
|
1190
|
+
self.request_id = request_id
|
1191
|
+
self.retry = retry
|
1192
|
+
self.timeout = timeout
|
1193
|
+
self.metadata = metadata
|
1194
|
+
self.gcp_conn_id = gcp_conn_id
|
1195
|
+
self.impersonation_chain = impersonation_chain
|
1196
|
+
self._hook: DataprocHook | None = None
|
1197
|
+
|
1198
|
+
@property
|
1199
|
+
def hook(self):
|
1200
|
+
if self._hook is None:
|
1201
|
+
self._hook = DataprocHook(
|
1202
|
+
gcp_conn_id=self.gcp_conn_id,
|
1203
|
+
impersonation_chain=self.impersonation_chain,
|
1204
|
+
)
|
1205
|
+
return self._hook
|
1206
|
+
|
1207
|
+
def _get_project_id(self) -> str:
|
1208
|
+
return self.project_id or self.hook.project_id
|
1209
|
+
|
1210
|
+
def _get_cluster(self) -> Cluster:
|
1211
|
+
"""Retrieve the cluster information.
|
1212
|
+
|
1213
|
+
:return: Instance of ``google.cloud.dataproc_v1.Cluster``` class
|
1214
|
+
"""
|
1215
|
+
return self.hook.get_cluster(
|
1216
|
+
project_id=self._get_project_id(),
|
1217
|
+
region=self.region,
|
1218
|
+
cluster_name=self.cluster_name,
|
1219
|
+
retry=self.retry,
|
1220
|
+
timeout=self.timeout,
|
1221
|
+
metadata=self.metadata,
|
1222
|
+
)
|
1223
|
+
|
1224
|
+
def _check_desired_cluster_state(self, cluster: Cluster) -> tuple[bool, str | None]:
|
1225
|
+
"""Implement this method in child class to return whether the cluster is in desired state or not.
|
1226
|
+
|
1227
|
+
If the cluster is in desired stated you can return a log message content as a second value
|
1228
|
+
for the return tuple.
|
1229
|
+
|
1230
|
+
:param cluster: Required. Instance of ``google.cloud.dataproc_v1.Cluster``
|
1231
|
+
class to interact with Dataproc API
|
1232
|
+
:return: Tuple of (Boolean, Optional[str]) The first value of the tuple is whether the cluster is
|
1233
|
+
in desired state or not. The second value of the tuple will use if you want to log something when
|
1234
|
+
the cluster is in desired state already.
|
1235
|
+
"""
|
1236
|
+
raise NotImplementedError
|
1237
|
+
|
1238
|
+
def _get_operation(self) -> operation.Operation:
|
1239
|
+
"""Implement this method in child class to call the related hook method and return its result.
|
1240
|
+
|
1241
|
+
:return: ``google.api_core.operation.Operation`` value whether the cluster is in desired state or not
|
1242
|
+
"""
|
1243
|
+
raise NotImplementedError
|
1244
|
+
|
1245
|
+
def execute(self, context: Context) -> dict | None:
|
1246
|
+
cluster: Cluster = self._get_cluster()
|
1247
|
+
is_already_desired_state, log_str = self._check_desired_cluster_state(cluster)
|
1248
|
+
if is_already_desired_state:
|
1249
|
+
self.log.info(log_str)
|
1250
|
+
return None
|
1251
|
+
|
1252
|
+
op: operation.Operation = self._get_operation()
|
1253
|
+
result = self.hook.wait_for_operation(timeout=self.timeout, result_retry=self.retry, operation=op)
|
1254
|
+
return Cluster.to_dict(result)
|
1255
|
+
|
1256
|
+
|
1257
|
+
class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
|
1258
|
+
"""Start a cluster in a project."""
|
1259
|
+
|
1260
|
+
operator_extra_links = (DataprocClusterLink(),)
|
1261
|
+
|
1262
|
+
def execute(self, context: Context) -> dict | None:
|
1263
|
+
self.log.info("Starting the cluster: %s", self.cluster_name)
|
1264
|
+
cluster = super().execute(context)
|
1265
|
+
DataprocClusterLink.persist(
|
1266
|
+
context=context,
|
1267
|
+
operator=self,
|
1268
|
+
cluster_id=self.cluster_name,
|
1269
|
+
project_id=self._get_project_id(),
|
1270
|
+
region=self.region,
|
1271
|
+
)
|
1272
|
+
self.log.info("Cluster started")
|
1273
|
+
return cluster
|
1274
|
+
|
1275
|
+
def _check_desired_cluster_state(self, cluster: Cluster) -> tuple[bool, str | None]:
|
1276
|
+
if cluster.status.state == cluster.status.State.RUNNING:
|
1277
|
+
return True, f'The cluster "{self.cluster_name}" already running!'
|
1278
|
+
return False, None
|
1279
|
+
|
1280
|
+
def _get_operation(self) -> operation.Operation:
|
1281
|
+
return self.hook.start_cluster(
|
1282
|
+
region=self.region,
|
1283
|
+
project_id=self._get_project_id(),
|
1284
|
+
cluster_name=self.cluster_name,
|
1285
|
+
cluster_uuid=self.cluster_uuid,
|
1286
|
+
retry=self.retry,
|
1287
|
+
timeout=self.timeout,
|
1288
|
+
metadata=self.metadata,
|
1289
|
+
)
|
1290
|
+
|
1291
|
+
|
1292
|
+
class DataprocStopClusterOperator(_DataprocStartStopClusterBaseOperator):
|
1293
|
+
"""Stop a cluster in a project."""
|
1294
|
+
|
1295
|
+
def execute(self, context: Context) -> dict | None:
|
1296
|
+
self.log.info("Stopping the cluster: %s", self.cluster_name)
|
1297
|
+
cluster = super().execute(context)
|
1298
|
+
self.log.info("Cluster stopped")
|
1299
|
+
return cluster
|
1300
|
+
|
1301
|
+
def _check_desired_cluster_state(self, cluster: Cluster) -> tuple[bool, str | None]:
|
1302
|
+
if cluster.status.state in [cluster.status.State.STOPPED, cluster.status.State.STOPPING]:
|
1303
|
+
return True, f'The cluster "{self.cluster_name}" already stopped!'
|
1304
|
+
return False, None
|
1305
|
+
|
1306
|
+
def _get_operation(self) -> operation.Operation:
|
1307
|
+
return self.hook.stop_cluster(
|
1308
|
+
region=self.region,
|
1309
|
+
project_id=self._get_project_id(),
|
1310
|
+
cluster_name=self.cluster_name,
|
1311
|
+
cluster_uuid=self.cluster_uuid,
|
1312
|
+
retry=self.retry,
|
1313
|
+
timeout=self.timeout,
|
1314
|
+
metadata=self.metadata,
|
1315
|
+
)
|
1316
|
+
|
1317
|
+
|
1085
1318
|
class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
1086
1319
|
"""Base class for operators that launch job on DataProc.
|
1087
1320
|
|
@@ -1256,6 +1489,15 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
1256
1489
|
self.hook.cancel_job(project_id=self.project_id, job_id=self.dataproc_job_id, region=self.region)
|
1257
1490
|
|
1258
1491
|
|
1492
|
+
# TODO: Remove one day
|
1493
|
+
@deprecated(
|
1494
|
+
reason=(
|
1495
|
+
"Please use `DataprocSubmitJobOperator` instead. "
|
1496
|
+
"You can use `generate_job` method to generate dictionary representing your job "
|
1497
|
+
"and use it with the new operator."
|
1498
|
+
),
|
1499
|
+
category=AirflowProviderDeprecationWarning,
|
1500
|
+
)
|
1259
1501
|
class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
|
1260
1502
|
"""Start a Pig query Job on a Cloud DataProc cluster.
|
1261
1503
|
|
@@ -1330,15 +1572,6 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
|
|
1330
1572
|
dataproc_jars: list[str] | None = None,
|
1331
1573
|
**kwargs,
|
1332
1574
|
) -> None:
|
1333
|
-
# TODO: Remove one day
|
1334
|
-
warnings.warn(
|
1335
|
-
"The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
|
1336
|
-
" `generate_job` method of `{cls}` to generate dictionary representing your job"
|
1337
|
-
" and use it with the new operator.".format(cls=type(self).__name__),
|
1338
|
-
AirflowProviderDeprecationWarning,
|
1339
|
-
stacklevel=2,
|
1340
|
-
)
|
1341
|
-
|
1342
1575
|
super().__init__(
|
1343
1576
|
impersonation_chain=impersonation_chain,
|
1344
1577
|
region=region,
|
@@ -1382,6 +1615,15 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator):
|
|
1382
1615
|
super().execute(context)
|
1383
1616
|
|
1384
1617
|
|
1618
|
+
# TODO: Remove one day
|
1619
|
+
@deprecated(
|
1620
|
+
reason=(
|
1621
|
+
"Please use `DataprocSubmitJobOperator` instead. "
|
1622
|
+
"You can use `generate_job` method to generate dictionary representing your job "
|
1623
|
+
"and use it with the new operator."
|
1624
|
+
),
|
1625
|
+
category=AirflowProviderDeprecationWarning,
|
1626
|
+
)
|
1385
1627
|
class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
|
1386
1628
|
"""Start a Hive query Job on a Cloud DataProc cluster.
|
1387
1629
|
|
@@ -1422,15 +1664,6 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
|
|
1422
1664
|
dataproc_jars: list[str] | None = None,
|
1423
1665
|
**kwargs,
|
1424
1666
|
) -> None:
|
1425
|
-
# TODO: Remove one day
|
1426
|
-
warnings.warn(
|
1427
|
-
"The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
|
1428
|
-
" `generate_job` method of `{cls}` to generate dictionary representing your job"
|
1429
|
-
" and use it with the new operator.".format(cls=type(self).__name__),
|
1430
|
-
AirflowProviderDeprecationWarning,
|
1431
|
-
stacklevel=2,
|
1432
|
-
)
|
1433
|
-
|
1434
1667
|
super().__init__(
|
1435
1668
|
impersonation_chain=impersonation_chain,
|
1436
1669
|
region=region,
|
@@ -1474,6 +1707,15 @@ class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator):
|
|
1474
1707
|
super().execute(context)
|
1475
1708
|
|
1476
1709
|
|
1710
|
+
# TODO: Remove one day
|
1711
|
+
@deprecated(
|
1712
|
+
reason=(
|
1713
|
+
"Please use `DataprocSubmitJobOperator` instead. "
|
1714
|
+
"You can use `generate_job` method to generate dictionary representing your job "
|
1715
|
+
"and use it with the new operator."
|
1716
|
+
),
|
1717
|
+
category=AirflowProviderDeprecationWarning,
|
1718
|
+
)
|
1477
1719
|
class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
|
1478
1720
|
"""Start a Spark SQL query Job on a Cloud DataProc cluster.
|
1479
1721
|
|
@@ -1515,15 +1757,6 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
|
|
1515
1757
|
dataproc_jars: list[str] | None = None,
|
1516
1758
|
**kwargs,
|
1517
1759
|
) -> None:
|
1518
|
-
# TODO: Remove one day
|
1519
|
-
warnings.warn(
|
1520
|
-
"The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
|
1521
|
-
" `generate_job` method of `{cls}` to generate dictionary representing your job"
|
1522
|
-
" and use it with the new operator.".format(cls=type(self).__name__),
|
1523
|
-
AirflowProviderDeprecationWarning,
|
1524
|
-
stacklevel=2,
|
1525
|
-
)
|
1526
|
-
|
1527
1760
|
super().__init__(
|
1528
1761
|
impersonation_chain=impersonation_chain,
|
1529
1762
|
region=region,
|
@@ -1565,6 +1798,15 @@ class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator):
|
|
1565
1798
|
super().execute(context)
|
1566
1799
|
|
1567
1800
|
|
1801
|
+
# TODO: Remove one day
|
1802
|
+
@deprecated(
|
1803
|
+
reason=(
|
1804
|
+
"Please use `DataprocSubmitJobOperator` instead. "
|
1805
|
+
"You can use `generate_job` method to generate dictionary representing your job "
|
1806
|
+
"and use it with the new operator."
|
1807
|
+
),
|
1808
|
+
category=AirflowProviderDeprecationWarning,
|
1809
|
+
)
|
1568
1810
|
class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
|
1569
1811
|
"""Start a Spark Job on a Cloud DataProc cluster.
|
1570
1812
|
|
@@ -1610,15 +1852,6 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
|
|
1610
1852
|
dataproc_jars: list[str] | None = None,
|
1611
1853
|
**kwargs,
|
1612
1854
|
) -> None:
|
1613
|
-
# TODO: Remove one day
|
1614
|
-
warnings.warn(
|
1615
|
-
"The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
|
1616
|
-
" `generate_job` method of `{cls}` to generate dictionary representing your job"
|
1617
|
-
" and use it with the new operator.".format(cls=type(self).__name__),
|
1618
|
-
AirflowProviderDeprecationWarning,
|
1619
|
-
stacklevel=2,
|
1620
|
-
)
|
1621
|
-
|
1622
1855
|
super().__init__(
|
1623
1856
|
impersonation_chain=impersonation_chain,
|
1624
1857
|
region=region,
|
@@ -1656,6 +1889,15 @@ class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator):
|
|
1656
1889
|
super().execute(context)
|
1657
1890
|
|
1658
1891
|
|
1892
|
+
# TODO: Remove one day
|
1893
|
+
@deprecated(
|
1894
|
+
reason=(
|
1895
|
+
"Please use `DataprocSubmitJobOperator` instead. "
|
1896
|
+
"You can use `generate_job` method to generate dictionary representing your job "
|
1897
|
+
"and use it with the new operator."
|
1898
|
+
),
|
1899
|
+
category=AirflowProviderDeprecationWarning,
|
1900
|
+
)
|
1659
1901
|
class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
|
1660
1902
|
"""Start a Hadoop Job on a Cloud DataProc cluster.
|
1661
1903
|
|
@@ -1701,15 +1943,6 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
|
|
1701
1943
|
dataproc_jars: list[str] | None = None,
|
1702
1944
|
**kwargs,
|
1703
1945
|
) -> None:
|
1704
|
-
# TODO: Remove one day
|
1705
|
-
warnings.warn(
|
1706
|
-
"The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
|
1707
|
-
" `generate_job` method of `{cls}` to generate dictionary representing your job"
|
1708
|
-
" and use it with the new operator.".format(cls=type(self).__name__),
|
1709
|
-
AirflowProviderDeprecationWarning,
|
1710
|
-
stacklevel=2,
|
1711
|
-
)
|
1712
|
-
|
1713
1946
|
super().__init__(
|
1714
1947
|
impersonation_chain=impersonation_chain,
|
1715
1948
|
region=region,
|
@@ -1746,6 +1979,15 @@ class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator):
|
|
1746
1979
|
super().execute(context)
|
1747
1980
|
|
1748
1981
|
|
1982
|
+
# TODO: Remove one day
|
1983
|
+
@deprecated(
|
1984
|
+
reason=(
|
1985
|
+
"Please use `DataprocSubmitJobOperator` instead. "
|
1986
|
+
"You can use `generate_job` method to generate dictionary representing your job "
|
1987
|
+
"and use it with the new operator."
|
1988
|
+
),
|
1989
|
+
category=AirflowProviderDeprecationWarning,
|
1990
|
+
)
|
1749
1991
|
class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
|
1750
1992
|
"""Start a PySpark Job on a Cloud DataProc cluster.
|
1751
1993
|
|
@@ -1815,15 +2057,6 @@ class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator):
|
|
1815
2057
|
dataproc_jars: list[str] | None = None,
|
1816
2058
|
**kwargs,
|
1817
2059
|
) -> None:
|
1818
|
-
# TODO: Remove one day
|
1819
|
-
warnings.warn(
|
1820
|
-
"The `{cls}` operator is deprecated, please use `DataprocSubmitJobOperator` instead. You can use"
|
1821
|
-
" `generate_job` method of `{cls}` to generate dictionary representing your job"
|
1822
|
-
" and use it with the new operator.".format(cls=type(self).__name__),
|
1823
|
-
AirflowProviderDeprecationWarning,
|
1824
|
-
stacklevel=2,
|
1825
|
-
)
|
1826
|
-
|
1827
2060
|
super().__init__(
|
1828
2061
|
impersonation_chain=impersonation_chain,
|
1829
2062
|
region=region,
|
@@ -22,6 +22,7 @@ import warnings
|
|
22
22
|
from functools import cached_property
|
23
23
|
from typing import TYPE_CHECKING, Any, Sequence
|
24
24
|
|
25
|
+
from deprecated import deprecated
|
25
26
|
from google.api_core.exceptions import AlreadyExists
|
26
27
|
from google.cloud.container_v1.types import Cluster
|
27
28
|
|
@@ -510,13 +511,12 @@ class GKEStartPodOperator(KubernetesPodOperator):
|
|
510
511
|
raise AirflowException("config_file is not an allowed parameter for the GKEStartPodOperator.")
|
511
512
|
|
512
513
|
@staticmethod
|
514
|
+
@deprecated(
|
515
|
+
reason="Please use `fetch_cluster_info` instead to get the cluster info for connecting to it.",
|
516
|
+
category=AirflowProviderDeprecationWarning,
|
517
|
+
)
|
513
518
|
def get_gke_config_file():
|
514
|
-
|
515
|
-
"The `get_gke_config_file` method is deprecated, "
|
516
|
-
"please use `fetch_cluster_info` instead to get the cluster info for connecting to it.",
|
517
|
-
AirflowProviderDeprecationWarning,
|
518
|
-
stacklevel=2,
|
519
|
-
)
|
519
|
+
pass
|
520
520
|
|
521
521
|
@cached_property
|
522
522
|
def cluster_hook(self) -> GKEHook:
|
@@ -18,9 +18,10 @@
|
|
18
18
|
"""Operators that interact with Google Cloud Life Sciences service."""
|
19
19
|
from __future__ import annotations
|
20
20
|
|
21
|
-
import warnings
|
22
21
|
from typing import TYPE_CHECKING, Sequence
|
23
22
|
|
23
|
+
from deprecated import deprecated
|
24
|
+
|
24
25
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
25
26
|
from airflow.providers.google.cloud.hooks.life_sciences import LifeSciencesHook
|
26
27
|
from airflow.providers.google.cloud.links.life_sciences import LifeSciencesLink
|
@@ -30,6 +31,14 @@ if TYPE_CHECKING:
|
|
30
31
|
from airflow.utils.context import Context
|
31
32
|
|
32
33
|
|
34
|
+
@deprecated(
|
35
|
+
reason=(
|
36
|
+
"Consider using Google Cloud Batch Operators instead."
|
37
|
+
"The Life Sciences API (beta) will be discontinued "
|
38
|
+
"on July 8, 2025 in favor of Google Cloud Batch."
|
39
|
+
),
|
40
|
+
category=AirflowProviderDeprecationWarning,
|
41
|
+
)
|
33
42
|
class LifeSciencesRunPipelineOperator(GoogleCloudBaseOperator):
|
34
43
|
"""
|
35
44
|
Runs a Life Sciences Pipeline.
|
@@ -87,14 +96,6 @@ class LifeSciencesRunPipelineOperator(GoogleCloudBaseOperator):
|
|
87
96
|
self._validate_inputs()
|
88
97
|
self.impersonation_chain = impersonation_chain
|
89
98
|
|
90
|
-
warnings.warn(
|
91
|
-
"""This operator is deprecated. Consider using Google Cloud Batch Operators instead.
|
92
|
-
The Life Sciences API (beta) will be discontinued on July 8, 2025 in favor
|
93
|
-
of Google Cloud Batch.""",
|
94
|
-
AirflowProviderDeprecationWarning,
|
95
|
-
stacklevel=3,
|
96
|
-
)
|
97
|
-
|
98
99
|
def _validate_inputs(self) -> None:
|
99
100
|
if not self.body:
|
100
101
|
raise AirflowException("The required parameter 'body' is missing")
|