apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -6
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +1 -1
- airflow/providers/google/cloud/hooks/bigquery.py +176 -293
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +288 -15
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -1
- airflow/providers/google/cloud/hooks/cloud_run.py +18 -10
- airflow/providers/google/cloud/hooks/cloud_sql.py +102 -23
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +29 -7
- airflow/providers/google/cloud/hooks/compute.py +1 -1
- airflow/providers/google/cloud/hooks/compute_ssh.py +6 -2
- airflow/providers/google/cloud/hooks/datacatalog.py +10 -1
- airflow/providers/google/cloud/hooks/dataflow.py +72 -95
- airflow/providers/google/cloud/hooks/dataform.py +1 -1
- airflow/providers/google/cloud/hooks/datafusion.py +21 -19
- airflow/providers/google/cloud/hooks/dataplex.py +2 -2
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +73 -72
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/hooks/dlp.py +1 -1
- airflow/providers/google/cloud/hooks/functions.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +112 -15
- airflow/providers/google/cloud/hooks/gdm.py +1 -1
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +3 -3
- airflow/providers/google/cloud/hooks/looker.py +6 -2
- airflow/providers/google/cloud/hooks/managed_kafka.py +1 -1
- airflow/providers/google/cloud/hooks/mlengine.py +4 -3
- airflow/providers/google/cloud/hooks/pubsub.py +3 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +74 -9
- airflow/providers/google/cloud/hooks/stackdriver.py +11 -9
- airflow/providers/google/cloud/hooks/tasks.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +2 -210
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +3 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +28 -2
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +308 -8
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +3 -3
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +58 -22
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +71 -56
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -6
- airflow/providers/google/cloud/operators/bigtable.py +37 -8
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +3 -3
- airflow/providers/google/cloud/operators/cloud_build.py +76 -33
- airflow/providers/google/cloud/operators/cloud_composer.py +129 -41
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +24 -6
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +93 -12
- airflow/providers/google/cloud/operators/compute.py +9 -41
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +40 -16
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +42 -21
- airflow/providers/google/cloud/operators/dataplex.py +194 -110
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +80 -36
- airflow/providers/google/cloud/operators/dataproc_metastore.py +97 -89
- airflow/providers/google/cloud/operators/datastore.py +23 -7
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +17 -8
- airflow/providers/google/cloud/operators/gcs.py +12 -9
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +62 -100
- airflow/providers/google/cloud/operators/looker.py +2 -2
- airflow/providers/google/cloud/operators/managed_kafka.py +108 -53
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +68 -15
- airflow/providers/google/cloud/operators/spanner.py +26 -13
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -3
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -3
- airflow/providers/google/cloud/operators/translate.py +41 -17
- airflow/providers/google/cloud/operators/translate_speech.py +2 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +30 -10
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +55 -27
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -115
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +12 -10
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +31 -8
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/secrets/secret_manager.py +3 -2
- airflow/providers/google/cloud/sensors/bigquery.py +3 -3
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -3
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -30
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -3
- airflow/providers/google/cloud/sensors/dataflow.py +26 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -3
- airflow/providers/google/cloud/sensors/datafusion.py +4 -5
- airflow/providers/google/cloud/sensors/dataplex.py +2 -3
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -3
- airflow/providers/google/cloud/sensors/gcs.py +4 -5
- airflow/providers/google/cloud/sensors/looker.py +2 -3
- airflow/providers/google/cloud/sensors/pubsub.py +4 -5
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -3
- airflow/providers/google/cloud/sensors/workflows.py +2 -3
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +10 -5
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +21 -13
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +4 -3
- airflow/providers/google/cloud/transfers/gcs_to_local.py +6 -4
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +11 -5
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +13 -7
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +14 -5
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +76 -35
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +3 -3
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +92 -2
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +123 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +47 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/triggers/vertex_ai.py +1 -1
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +2 -2
- airflow/providers/google/cloud/utils/field_sanitizer.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +2 -3
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -9
- airflow/providers/google/common/hooks/operation_helpers.py +1 -1
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/hooks/firestore.py +1 -1
- airflow/providers/google/firebase/operators/firestore.py +3 -3
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +27 -2
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -1
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +2 -3
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +6 -6
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -64
- airflow/providers/google/suite/hooks/calendar.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +16 -2
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +3 -3
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/METADATA +90 -46
- apache_airflow_providers_google-19.3.0.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.3.0.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.3.0.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.3.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -36,7 +36,8 @@ from google.api_core.retry import Retry, exponential_sleep_generator
|
|
|
36
36
|
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
|
37
37
|
|
|
38
38
|
from airflow.configuration import conf
|
|
39
|
-
from airflow.exceptions import
|
|
39
|
+
from airflow.exceptions import AirflowProviderDeprecationWarning
|
|
40
|
+
from airflow.providers.common.compat.sdk import AirflowException
|
|
40
41
|
from airflow.providers.google.cloud.hooks.dataproc import (
|
|
41
42
|
DataprocHook,
|
|
42
43
|
DataProcJobBuilder,
|
|
@@ -72,7 +73,7 @@ if TYPE_CHECKING:
|
|
|
72
73
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
73
74
|
from google.type.interval_pb2 import Interval
|
|
74
75
|
|
|
75
|
-
from airflow.
|
|
76
|
+
from airflow.providers.common.compat.sdk import Context
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
class PreemptibilityType(Enum):
|
|
@@ -213,6 +214,7 @@ class ClusterGenerator:
|
|
|
213
214
|
:param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
|
|
214
215
|
see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
|
|
215
216
|
:param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
|
|
217
|
+
:param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD" / "CLUSTER_TIER_PREMIUM").
|
|
216
218
|
"""
|
|
217
219
|
|
|
218
220
|
def __init__(
|
|
@@ -261,6 +263,8 @@ class ClusterGenerator:
|
|
|
261
263
|
secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
|
|
262
264
|
secondary_worker_accelerator_type: str | None = None,
|
|
263
265
|
secondary_worker_accelerator_count: int | None = None,
|
|
266
|
+
*,
|
|
267
|
+
cluster_tier: str | None = None,
|
|
264
268
|
**kwargs,
|
|
265
269
|
) -> None:
|
|
266
270
|
self.project_id = project_id
|
|
@@ -308,6 +312,7 @@ class ClusterGenerator:
|
|
|
308
312
|
self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
|
|
309
313
|
self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
|
|
310
314
|
self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
|
|
315
|
+
self.cluster_tier = cluster_tier
|
|
311
316
|
|
|
312
317
|
if self.custom_image and self.image_version:
|
|
313
318
|
raise ValueError("The custom_image and image_version can't be both set")
|
|
@@ -513,6 +518,9 @@ class ClusterGenerator:
|
|
|
513
518
|
if self.driver_pool_size > 0:
|
|
514
519
|
cluster_data["auxiliary_node_groups"] = [self._build_driver_pool()]
|
|
515
520
|
|
|
521
|
+
if self.cluster_tier:
|
|
522
|
+
cluster_data["cluster_tier"] = self.cluster_tier
|
|
523
|
+
|
|
516
524
|
cluster_data = self._build_gce_cluster_config(cluster_data)
|
|
517
525
|
|
|
518
526
|
if self.single_node:
|
|
@@ -621,6 +629,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
621
629
|
"virtual_cluster_config",
|
|
622
630
|
"cluster_name",
|
|
623
631
|
"labels",
|
|
632
|
+
"gcp_conn_id",
|
|
624
633
|
"impersonation_chain",
|
|
625
634
|
)
|
|
626
635
|
template_fields_renderers = {"cluster_config": "json", "virtual_cluster_config": "json"}
|
|
@@ -807,7 +816,6 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
807
816
|
if project_id:
|
|
808
817
|
DataprocClusterLink.persist(
|
|
809
818
|
context=context,
|
|
810
|
-
operator=self,
|
|
811
819
|
cluster_id=self.cluster_name,
|
|
812
820
|
project_id=project_id,
|
|
813
821
|
region=self.region,
|
|
@@ -908,7 +916,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
908
916
|
cluster_state = event["cluster_state"]
|
|
909
917
|
cluster_name = event["cluster_name"]
|
|
910
918
|
|
|
911
|
-
if cluster_state == ClusterStatus.State.
|
|
919
|
+
if cluster_state == ClusterStatus.State(ClusterStatus.State.DELETING).name:
|
|
912
920
|
raise AirflowException(f"Cluster is in ERROR state:\n{cluster_name}")
|
|
913
921
|
|
|
914
922
|
self.log.info("%s completed successfully.", self.task_id)
|
|
@@ -945,7 +953,13 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
|
945
953
|
:param polling_interval_seconds: Time (seconds) to wait between calls to check the cluster status.
|
|
946
954
|
"""
|
|
947
955
|
|
|
948
|
-
template_fields: Sequence[str] = (
|
|
956
|
+
template_fields: Sequence[str] = (
|
|
957
|
+
"project_id",
|
|
958
|
+
"region",
|
|
959
|
+
"cluster_name",
|
|
960
|
+
"gcp_conn_id",
|
|
961
|
+
"impersonation_chain",
|
|
962
|
+
)
|
|
949
963
|
|
|
950
964
|
def __init__(
|
|
951
965
|
self,
|
|
@@ -1071,6 +1085,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
|
|
|
1071
1085
|
"region",
|
|
1072
1086
|
"project_id",
|
|
1073
1087
|
"request_id",
|
|
1088
|
+
"gcp_conn_id",
|
|
1074
1089
|
"impersonation_chain",
|
|
1075
1090
|
)
|
|
1076
1091
|
|
|
@@ -1174,7 +1189,6 @@ class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
|
|
|
1174
1189
|
cluster = super().execute(context)
|
|
1175
1190
|
DataprocClusterLink.persist(
|
|
1176
1191
|
context=context,
|
|
1177
|
-
operator=self,
|
|
1178
1192
|
cluster_id=self.cluster_name,
|
|
1179
1193
|
project_id=self._get_project_id(),
|
|
1180
1194
|
region=self.region,
|
|
@@ -1355,7 +1369,11 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
|
1355
1369
|
self.log.info("Job %s submitted successfully.", job_id)
|
|
1356
1370
|
# Save data required for extra links no matter what the job status will be
|
|
1357
1371
|
DataprocLink.persist(
|
|
1358
|
-
context=context,
|
|
1372
|
+
context=context,
|
|
1373
|
+
url=DATAPROC_JOB_LINK_DEPRECATED,
|
|
1374
|
+
resource=job_id,
|
|
1375
|
+
region=self.region,
|
|
1376
|
+
project_id=self.project_id,
|
|
1359
1377
|
)
|
|
1360
1378
|
|
|
1361
1379
|
if self.deferrable:
|
|
@@ -1413,7 +1431,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1413
1431
|
:param metadata: Additional metadata that is provided to the method.
|
|
1414
1432
|
"""
|
|
1415
1433
|
|
|
1416
|
-
template_fields: Sequence[str] = ("region", "template")
|
|
1434
|
+
template_fields: Sequence[str] = ("region", "template", "gcp_conn_id")
|
|
1417
1435
|
template_fields_renderers = {"template": "json"}
|
|
1418
1436
|
operator_extra_links = (DataprocWorkflowTemplateLink(),)
|
|
1419
1437
|
|
|
@@ -1459,7 +1477,6 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1459
1477
|
if project_id:
|
|
1460
1478
|
DataprocWorkflowTemplateLink.persist(
|
|
1461
1479
|
context=context,
|
|
1462
|
-
operator=self,
|
|
1463
1480
|
workflow_template_id=self.template["id"],
|
|
1464
1481
|
region=self.region,
|
|
1465
1482
|
project_id=project_id,
|
|
@@ -1508,7 +1525,13 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1508
1525
|
:param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
|
|
1509
1526
|
"""
|
|
1510
1527
|
|
|
1511
|
-
template_fields: Sequence[str] = (
|
|
1528
|
+
template_fields: Sequence[str] = (
|
|
1529
|
+
"template_id",
|
|
1530
|
+
"gcp_conn_id",
|
|
1531
|
+
"impersonation_chain",
|
|
1532
|
+
"request_id",
|
|
1533
|
+
"parameters",
|
|
1534
|
+
)
|
|
1512
1535
|
template_fields_renderers = {"parameters": "json"}
|
|
1513
1536
|
operator_extra_links = (DataprocWorkflowLink(),)
|
|
1514
1537
|
|
|
@@ -1571,7 +1594,6 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1571
1594
|
if project_id:
|
|
1572
1595
|
DataprocWorkflowLink.persist(
|
|
1573
1596
|
context=context,
|
|
1574
|
-
operator=self,
|
|
1575
1597
|
workflow_id=workflow_id,
|
|
1576
1598
|
region=self.region,
|
|
1577
1599
|
project_id=project_id,
|
|
@@ -1657,7 +1679,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
|
1657
1679
|
:param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
|
|
1658
1680
|
"""
|
|
1659
1681
|
|
|
1660
|
-
template_fields: Sequence[str] = ("template", "impersonation_chain")
|
|
1682
|
+
template_fields: Sequence[str] = ("template", "gcp_conn_id", "impersonation_chain")
|
|
1661
1683
|
template_fields_renderers = {"template": "json"}
|
|
1662
1684
|
operator_extra_links = (DataprocWorkflowLink(),)
|
|
1663
1685
|
|
|
@@ -1727,7 +1749,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
|
1727
1749
|
if project_id:
|
|
1728
1750
|
DataprocWorkflowLink.persist(
|
|
1729
1751
|
context=context,
|
|
1730
|
-
operator=self,
|
|
1731
1752
|
workflow_id=workflow_id,
|
|
1732
1753
|
region=self.region,
|
|
1733
1754
|
project_id=project_id,
|
|
@@ -1826,7 +1847,14 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1826
1847
|
:param wait_timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False
|
|
1827
1848
|
"""
|
|
1828
1849
|
|
|
1829
|
-
template_fields: Sequence[str] = (
|
|
1850
|
+
template_fields: Sequence[str] = (
|
|
1851
|
+
"project_id",
|
|
1852
|
+
"region",
|
|
1853
|
+
"job",
|
|
1854
|
+
"gcp_conn_id",
|
|
1855
|
+
"impersonation_chain",
|
|
1856
|
+
"request_id",
|
|
1857
|
+
)
|
|
1830
1858
|
template_fields_renderers = {"job": "json"}
|
|
1831
1859
|
|
|
1832
1860
|
operator_extra_links = (DataprocJobLink(),)
|
|
@@ -1901,7 +1929,6 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1901
1929
|
if project_id:
|
|
1902
1930
|
DataprocJobLink.persist(
|
|
1903
1931
|
context=context,
|
|
1904
|
-
operator=self,
|
|
1905
1932
|
job_id=new_job_id,
|
|
1906
1933
|
region=self.region,
|
|
1907
1934
|
project_id=project_id,
|
|
@@ -1947,9 +1974,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1947
1974
|
job_state = event["job_state"]
|
|
1948
1975
|
job_id = event["job_id"]
|
|
1949
1976
|
job = event["job"]
|
|
1950
|
-
if job_state == JobStatus.State.ERROR:
|
|
1977
|
+
if job_state == JobStatus.State.ERROR.name: # type: ignore
|
|
1951
1978
|
raise AirflowException(f"Job {job_id} failed:\n{job}")
|
|
1952
|
-
if job_state == JobStatus.State.CANCELLED:
|
|
1979
|
+
if job_state == JobStatus.State.CANCELLED.name: # type: ignore
|
|
1953
1980
|
raise AirflowException(f"Job {job_id} was cancelled:\n{job}")
|
|
1954
1981
|
self.log.info("%s completed successfully.", self.task_id)
|
|
1955
1982
|
return job_id
|
|
@@ -2026,6 +2053,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
|
2026
2053
|
"region",
|
|
2027
2054
|
"request_id",
|
|
2028
2055
|
"project_id",
|
|
2056
|
+
"gcp_conn_id",
|
|
2029
2057
|
"impersonation_chain",
|
|
2030
2058
|
)
|
|
2031
2059
|
operator_extra_links = (DataprocClusterLink(),)
|
|
@@ -2074,7 +2102,6 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
|
2074
2102
|
if project_id:
|
|
2075
2103
|
DataprocClusterLink.persist(
|
|
2076
2104
|
context=context,
|
|
2077
|
-
operator=self,
|
|
2078
2105
|
cluster_id=self.cluster_name,
|
|
2079
2106
|
project_id=project_id,
|
|
2080
2107
|
region=self.region,
|
|
@@ -2162,6 +2189,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
|
|
|
2162
2189
|
"project_id",
|
|
2163
2190
|
"region",
|
|
2164
2191
|
"cluster_name",
|
|
2192
|
+
"gcp_conn_id",
|
|
2165
2193
|
"impersonation_chain",
|
|
2166
2194
|
"tarball_gcs_dir",
|
|
2167
2195
|
"diagnosis_interval",
|
|
@@ -2308,6 +2336,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2308
2336
|
"batch",
|
|
2309
2337
|
"batch_id",
|
|
2310
2338
|
"region",
|
|
2339
|
+
"gcp_conn_id",
|
|
2311
2340
|
"impersonation_chain",
|
|
2312
2341
|
)
|
|
2313
2342
|
operator_extra_links = (DataprocBatchLink(),)
|
|
@@ -2373,7 +2402,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2373
2402
|
# Persist the link earlier so users can observe the progress
|
|
2374
2403
|
DataprocBatchLink.persist(
|
|
2375
2404
|
context=context,
|
|
2376
|
-
operator=self,
|
|
2377
2405
|
project_id=self.project_id,
|
|
2378
2406
|
region=self.region,
|
|
2379
2407
|
batch_id=self.batch_id,
|
|
@@ -2410,7 +2438,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2410
2438
|
|
|
2411
2439
|
DataprocBatchLink.persist(
|
|
2412
2440
|
context=context,
|
|
2413
|
-
operator=self,
|
|
2414
2441
|
project_id=self.project_id,
|
|
2415
2442
|
region=self.region,
|
|
2416
2443
|
batch_id=batch_id,
|
|
@@ -2460,7 +2487,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2460
2487
|
if not self.hook.check_error_for_resource_is_not_ready_msg(batch.state_message):
|
|
2461
2488
|
break
|
|
2462
2489
|
|
|
2463
|
-
self.handle_batch_status(context, batch.state, batch_id, batch.state_message)
|
|
2490
|
+
self.handle_batch_status(context, batch.state.name, batch_id, batch.state_message)
|
|
2464
2491
|
return Batch.to_dict(batch)
|
|
2465
2492
|
|
|
2466
2493
|
@cached_property
|
|
@@ -2485,21 +2512,21 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2485
2512
|
self.operation.cancel()
|
|
2486
2513
|
|
|
2487
2514
|
def handle_batch_status(
|
|
2488
|
-
self, context: Context, state:
|
|
2515
|
+
self, context: Context, state: str, batch_id: str, state_message: str | None = None
|
|
2489
2516
|
) -> None:
|
|
2490
2517
|
# The existing batch may be a number of states other than 'SUCCEEDED'\
|
|
2491
2518
|
# wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
|
|
2492
2519
|
# finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
|
|
2493
2520
|
link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, batch_id=batch_id)
|
|
2494
|
-
if state == Batch.State.FAILED:
|
|
2521
|
+
if state == Batch.State.FAILED.name: # type: ignore
|
|
2495
2522
|
raise AirflowException(
|
|
2496
|
-
f"Batch job {batch_id} failed with error: {state_message}
|
|
2523
|
+
f"Batch job {batch_id} failed with error: {state_message}.\nDriver logs: {link}"
|
|
2497
2524
|
)
|
|
2498
|
-
if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
|
|
2499
|
-
raise AirflowException(f"Batch job {batch_id} was cancelled
|
|
2500
|
-
if state == Batch.State.STATE_UNSPECIFIED:
|
|
2501
|
-
raise AirflowException(f"Batch job {batch_id} unspecified
|
|
2502
|
-
self.log.info("Batch job %s completed
|
|
2525
|
+
if state in (Batch.State.CANCELLED.name, Batch.State.CANCELLING.name): # type: ignore
|
|
2526
|
+
raise AirflowException(f"Batch job {batch_id} was cancelled.\nDriver logs: {link}")
|
|
2527
|
+
if state == Batch.State.STATE_UNSPECIFIED.name: # type: ignore
|
|
2528
|
+
raise AirflowException(f"Batch job {batch_id} unspecified.\nDriver logs: {link}")
|
|
2529
|
+
self.log.info("Batch job %s completed.\nDriver logs: %s", batch_id, link)
|
|
2503
2530
|
|
|
2504
2531
|
def retry_batch_creation(
|
|
2505
2532
|
self,
|
|
@@ -2571,7 +2598,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2571
2598
|
dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
|
|
2572
2599
|
task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
|
|
2573
2600
|
|
|
2574
|
-
labels_regex = re.compile(r"^[a-z][\w-]{0,
|
|
2601
|
+
labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
|
|
2575
2602
|
if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
|
|
2576
2603
|
return
|
|
2577
2604
|
|
|
@@ -2618,7 +2645,13 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
|
|
2618
2645
|
account from the list granting this role to the originating account (templated).
|
|
2619
2646
|
"""
|
|
2620
2647
|
|
|
2621
|
-
template_fields: Sequence[str] = (
|
|
2648
|
+
template_fields: Sequence[str] = (
|
|
2649
|
+
"batch_id",
|
|
2650
|
+
"region",
|
|
2651
|
+
"project_id",
|
|
2652
|
+
"gcp_conn_id",
|
|
2653
|
+
"impersonation_chain",
|
|
2654
|
+
)
|
|
2622
2655
|
|
|
2623
2656
|
def __init__(
|
|
2624
2657
|
self,
|
|
@@ -2682,7 +2715,13 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
|
2682
2715
|
account from the list granting this role to the originating account (templated).
|
|
2683
2716
|
"""
|
|
2684
2717
|
|
|
2685
|
-
template_fields: Sequence[str] = (
|
|
2718
|
+
template_fields: Sequence[str] = (
|
|
2719
|
+
"batch_id",
|
|
2720
|
+
"region",
|
|
2721
|
+
"project_id",
|
|
2722
|
+
"gcp_conn_id",
|
|
2723
|
+
"impersonation_chain",
|
|
2724
|
+
)
|
|
2686
2725
|
operator_extra_links = (DataprocBatchLink(),)
|
|
2687
2726
|
|
|
2688
2727
|
def __init__(
|
|
@@ -2723,7 +2762,6 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
|
2723
2762
|
if project_id:
|
|
2724
2763
|
DataprocBatchLink.persist(
|
|
2725
2764
|
context=context,
|
|
2726
|
-
operator=self,
|
|
2727
2765
|
project_id=project_id,
|
|
2728
2766
|
region=self.region,
|
|
2729
2767
|
batch_id=self.batch_id,
|
|
@@ -2759,7 +2797,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
|
2759
2797
|
:param order_by: How to order results as specified in ListBatchesRequest
|
|
2760
2798
|
"""
|
|
2761
2799
|
|
|
2762
|
-
template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
|
|
2800
|
+
template_fields: Sequence[str] = ("region", "project_id", "gcp_conn_id", "impersonation_chain")
|
|
2763
2801
|
operator_extra_links = (DataprocBatchesListLink(),)
|
|
2764
2802
|
|
|
2765
2803
|
def __init__(
|
|
@@ -2806,7 +2844,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
|
2806
2844
|
)
|
|
2807
2845
|
project_id = self.project_id or hook.project_id
|
|
2808
2846
|
if project_id:
|
|
2809
|
-
DataprocBatchesListLink.persist(context=context,
|
|
2847
|
+
DataprocBatchesListLink.persist(context=context, project_id=project_id)
|
|
2810
2848
|
return [Batch.to_dict(result) for result in results]
|
|
2811
2849
|
|
|
2812
2850
|
|
|
@@ -2833,7 +2871,13 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
|
|
|
2833
2871
|
account from the list granting this role to the originating account (templated).
|
|
2834
2872
|
"""
|
|
2835
2873
|
|
|
2836
|
-
template_fields: Sequence[str] = (
|
|
2874
|
+
template_fields: Sequence[str] = (
|
|
2875
|
+
"operation_name",
|
|
2876
|
+
"region",
|
|
2877
|
+
"project_id",
|
|
2878
|
+
"gcp_conn_id",
|
|
2879
|
+
"impersonation_chain",
|
|
2880
|
+
)
|
|
2837
2881
|
|
|
2838
2882
|
def __init__(
|
|
2839
2883
|
self,
|