apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
- airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/_vendor/__init__.py +0 -0
- airflow/providers/google/_vendor/json_merge_patch.py +91 -0
- airflow/providers/google/ads/hooks/ads.py +52 -43
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
- airflow/providers/google/cloud/hooks/bigquery.py +195 -318
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +3 -2
- airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
- airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
- airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
- airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
- airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
- airflow/providers/google/cloud/hooks/compute.py +7 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
- airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
- airflow/providers/google/cloud/hooks/dataflow.py +87 -242
- airflow/providers/google/cloud/hooks/dataform.py +9 -14
- airflow/providers/google/cloud/hooks/datafusion.py +7 -9
- airflow/providers/google/cloud/hooks/dataplex.py +13 -12
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +76 -74
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/hooks/dlp.py +5 -4
- airflow/providers/google/cloud/hooks/gcs.py +144 -33
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kms.py +3 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
- airflow/providers/google/cloud/hooks/mlengine.py +7 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -2
- airflow/providers/google/cloud/hooks/os_login.py +3 -2
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
- airflow/providers/google/cloud/hooks/spanner.py +75 -10
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
- airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
- airflow/providers/google/cloud/hooks/tasks.py +4 -3
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
- airflow/providers/google/cloud/hooks/translate.py +8 -17
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
- airflow/providers/google/cloud/hooks/vision.py +7 -7
- airflow/providers/google/cloud/hooks/workflows.py +4 -3
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -7
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -90
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -89
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
- airflow/providers/google/cloud/links/managed_kafka.py +11 -51
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +141 -40
- airflow/providers/google/cloud/openlineage/mixins.py +14 -13
- airflow/providers/google/cloud/openlineage/utils.py +19 -3
- airflow/providers/google/cloud/operators/alloy_db.py +76 -61
- airflow/providers/google/cloud/operators/bigquery.py +104 -667
- airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
- airflow/providers/google/cloud/operators/bigtable.py +38 -7
- airflow/providers/google/cloud/operators/cloud_base.py +22 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
- airflow/providers/google/cloud/operators/cloud_build.py +80 -36
- airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
- airflow/providers/google/cloud/operators/cloud_run.py +39 -20
- airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
- airflow/providers/google/cloud/operators/compute.py +18 -50
- airflow/providers/google/cloud/operators/datacatalog.py +167 -29
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +19 -7
- airflow/providers/google/cloud/operators/datafusion.py +43 -43
- airflow/providers/google/cloud/operators/dataplex.py +212 -126
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +134 -207
- airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +24 -45
- airflow/providers/google/cloud/operators/functions.py +21 -14
- airflow/providers/google/cloud/operators/gcs.py +15 -12
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
- airflow/providers/google/cloud/operators/natural_language.py +5 -3
- airflow/providers/google/cloud/operators/pubsub.py +69 -21
- airflow/providers/google/cloud/operators/spanner.py +53 -45
- airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
- airflow/providers/google/cloud/operators/stackdriver.py +5 -11
- airflow/providers/google/cloud/operators/tasks.py +6 -15
- airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
- airflow/providers/google/cloud/operators/translate.py +46 -20
- airflow/providers/google/cloud/operators/translate_speech.py +4 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
- airflow/providers/google/cloud/operators/vision.py +7 -5
- airflow/providers/google/cloud/operators/workflows.py +24 -19
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
- airflow/providers/google/cloud/sensors/bigtable.py +14 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
- airflow/providers/google/cloud/sensors/dataflow.py +27 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +7 -5
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +10 -9
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/sensors/gcs.py +22 -21
- airflow/providers/google/cloud/sensors/looker.py +5 -5
- airflow/providers/google/cloud/sensors/pubsub.py +20 -20
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +6 -4
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
- airflow/providers/google/cloud/triggers/dataflow.py +125 -2
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +16 -3
- airflow/providers/google/cloud/triggers/dataproc.py +124 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +17 -20
- airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
- airflow/providers/google/cloud/utils/bigquery.py +5 -7
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/cloud/utils/validators.py +43 -0
- airflow/providers/google/common/auth_backend/google_openid.py +26 -9
- airflow/providers/google/common/consts.py +2 -1
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +40 -43
- airflow/providers/google/common/hooks/operation_helpers.py +78 -0
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +4 -5
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +61 -216
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/drive.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
- airflow/providers/google/cloud/hooks/automl.py +0 -679
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1360
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -1515
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
- apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
- /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -30,6 +30,11 @@ from enum import Enum
|
|
|
30
30
|
from functools import cached_property
|
|
31
31
|
from typing import TYPE_CHECKING, Any
|
|
32
32
|
|
|
33
|
+
from google.api_core.exceptions import AlreadyExists, NotFound
|
|
34
|
+
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
35
|
+
from google.api_core.retry import Retry, exponential_sleep_generator
|
|
36
|
+
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
|
37
|
+
|
|
33
38
|
from airflow.configuration import conf
|
|
34
39
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
35
40
|
from airflow.providers.google.cloud.hooks.dataproc import (
|
|
@@ -39,7 +44,6 @@ from airflow.providers.google.cloud.hooks.dataproc import (
|
|
|
39
44
|
)
|
|
40
45
|
from airflow.providers.google.cloud.links.dataproc import (
|
|
41
46
|
DATAPROC_BATCH_LINK,
|
|
42
|
-
DATAPROC_CLUSTER_LINK_DEPRECATED,
|
|
43
47
|
DATAPROC_JOB_LINK_DEPRECATED,
|
|
44
48
|
DataprocBatchesListLink,
|
|
45
49
|
DataprocBatchLink,
|
|
@@ -58,22 +62,18 @@ from airflow.providers.google.cloud.triggers.dataproc import (
|
|
|
58
62
|
DataprocSubmitTrigger,
|
|
59
63
|
)
|
|
60
64
|
from airflow.providers.google.cloud.utils.dataproc import DataprocOperationType
|
|
61
|
-
from airflow.providers.google.common.deprecated import deprecated
|
|
62
65
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
63
66
|
from airflow.utils import timezone
|
|
64
|
-
from google.api_core.exceptions import AlreadyExists, NotFound
|
|
65
|
-
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
66
|
-
from google.api_core.retry import Retry, exponential_sleep_generator
|
|
67
|
-
from google.cloud.dataproc_v1 import Batch, Cluster, ClusterStatus, JobStatus
|
|
68
67
|
|
|
69
68
|
if TYPE_CHECKING:
|
|
70
|
-
from airflow.utils.context import Context
|
|
71
69
|
from google.api_core import operation
|
|
72
70
|
from google.api_core.retry_async import AsyncRetry
|
|
73
71
|
from google.protobuf.duration_pb2 import Duration
|
|
74
72
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
75
73
|
from google.type.interval_pb2 import Interval
|
|
76
74
|
|
|
75
|
+
from airflow.providers.common.compat.sdk import Context
|
|
76
|
+
|
|
77
77
|
|
|
78
78
|
class PreemptibilityType(Enum):
|
|
79
79
|
"""Contains possible Type values of Preemptibility applicable for every secondary worker of Cluster."""
|
|
@@ -213,6 +213,7 @@ class ClusterGenerator:
|
|
|
213
213
|
:param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
|
|
214
214
|
see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
|
|
215
215
|
:param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
|
|
216
|
+
:param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD" / "CLUSTER_TIER_PREMIUM").
|
|
216
217
|
"""
|
|
217
218
|
|
|
218
219
|
def __init__(
|
|
@@ -261,6 +262,8 @@ class ClusterGenerator:
|
|
|
261
262
|
secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
|
|
262
263
|
secondary_worker_accelerator_type: str | None = None,
|
|
263
264
|
secondary_worker_accelerator_count: int | None = None,
|
|
265
|
+
*,
|
|
266
|
+
cluster_tier: str | None = None,
|
|
264
267
|
**kwargs,
|
|
265
268
|
) -> None:
|
|
266
269
|
self.project_id = project_id
|
|
@@ -308,6 +311,7 @@ class ClusterGenerator:
|
|
|
308
311
|
self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
|
|
309
312
|
self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
|
|
310
313
|
self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
|
|
314
|
+
self.cluster_tier = cluster_tier
|
|
311
315
|
|
|
312
316
|
if self.custom_image and self.image_version:
|
|
313
317
|
raise ValueError("The custom_image and image_version can't be both set")
|
|
@@ -340,7 +344,7 @@ class ClusterGenerator:
|
|
|
340
344
|
unit = match.group(2)
|
|
341
345
|
if unit == "s":
|
|
342
346
|
return {"seconds": val}
|
|
343
|
-
|
|
347
|
+
if unit == "m":
|
|
344
348
|
return {"seconds": int(timedelta(minutes=val).total_seconds())}
|
|
345
349
|
|
|
346
350
|
raise AirflowException(
|
|
@@ -513,6 +517,9 @@ class ClusterGenerator:
|
|
|
513
517
|
if self.driver_pool_size > 0:
|
|
514
518
|
cluster_data["auxiliary_node_groups"] = [self._build_driver_pool()]
|
|
515
519
|
|
|
520
|
+
if self.cluster_tier:
|
|
521
|
+
cluster_data["cluster_tier"] = self.cluster_tier
|
|
522
|
+
|
|
516
523
|
cluster_data = self._build_gce_cluster_config(cluster_data)
|
|
517
524
|
|
|
518
525
|
if self.single_node:
|
|
@@ -621,6 +628,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
621
628
|
"virtual_cluster_config",
|
|
622
629
|
"cluster_name",
|
|
623
630
|
"labels",
|
|
631
|
+
"gcp_conn_id",
|
|
624
632
|
"impersonation_chain",
|
|
625
633
|
)
|
|
626
634
|
template_fields_renderers = {"cluster_config": "json", "virtual_cluster_config": "json"}
|
|
@@ -807,7 +815,6 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
807
815
|
if project_id:
|
|
808
816
|
DataprocClusterLink.persist(
|
|
809
817
|
context=context,
|
|
810
|
-
operator=self,
|
|
811
818
|
cluster_id=self.cluster_name,
|
|
812
819
|
project_id=project_id,
|
|
813
820
|
region=self.region,
|
|
@@ -822,26 +829,24 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
822
829
|
)
|
|
823
830
|
self.log.info("Cluster created.")
|
|
824
831
|
return Cluster.to_dict(cluster)
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
self.
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
method_name="execute_complete",
|
|
844
|
-
)
|
|
832
|
+
cluster = hook.get_cluster(
|
|
833
|
+
project_id=self.project_id, region=self.region, cluster_name=self.cluster_name
|
|
834
|
+
)
|
|
835
|
+
if cluster.status.state == cluster.status.State.RUNNING:
|
|
836
|
+
self.log.info("Cluster created.")
|
|
837
|
+
return Cluster.to_dict(cluster)
|
|
838
|
+
self.defer(
|
|
839
|
+
trigger=DataprocClusterTrigger(
|
|
840
|
+
cluster_name=self.cluster_name,
|
|
841
|
+
project_id=self.project_id,
|
|
842
|
+
region=self.region,
|
|
843
|
+
gcp_conn_id=self.gcp_conn_id,
|
|
844
|
+
impersonation_chain=self.impersonation_chain,
|
|
845
|
+
polling_interval_seconds=self.polling_interval_seconds,
|
|
846
|
+
delete_on_error=self.delete_on_error,
|
|
847
|
+
),
|
|
848
|
+
method_name="execute_complete",
|
|
849
|
+
)
|
|
845
850
|
except AlreadyExists:
|
|
846
851
|
if not self.use_if_exists:
|
|
847
852
|
raise
|
|
@@ -910,152 +915,13 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
910
915
|
cluster_state = event["cluster_state"]
|
|
911
916
|
cluster_name = event["cluster_name"]
|
|
912
917
|
|
|
913
|
-
if cluster_state == ClusterStatus.State.
|
|
918
|
+
if cluster_state == ClusterStatus.State(ClusterStatus.State.DELETING).name:
|
|
914
919
|
raise AirflowException(f"Cluster is in ERROR state:\n{cluster_name}")
|
|
915
920
|
|
|
916
921
|
self.log.info("%s completed successfully.", self.task_id)
|
|
917
922
|
return event["cluster"]
|
|
918
923
|
|
|
919
924
|
|
|
920
|
-
# TODO: Remove one day
|
|
921
|
-
@deprecated(
|
|
922
|
-
planned_removal_date="March 01, 2025",
|
|
923
|
-
use_instead="DataprocUpdateClusterOperator",
|
|
924
|
-
category=AirflowProviderDeprecationWarning,
|
|
925
|
-
)
|
|
926
|
-
class DataprocScaleClusterOperator(GoogleCloudBaseOperator):
|
|
927
|
-
"""
|
|
928
|
-
Scale, up or down, a cluster on Google Cloud Dataproc.
|
|
929
|
-
|
|
930
|
-
The operator will wait until the cluster is re-scaled.
|
|
931
|
-
|
|
932
|
-
Example usage:
|
|
933
|
-
|
|
934
|
-
.. code-block:: python
|
|
935
|
-
|
|
936
|
-
t1 = DataprocClusterScaleOperator(
|
|
937
|
-
task_id="dataproc_scale",
|
|
938
|
-
project_id="my-project",
|
|
939
|
-
cluster_name="cluster-1",
|
|
940
|
-
num_workers=10,
|
|
941
|
-
num_preemptible_workers=10,
|
|
942
|
-
graceful_decommission_timeout="1h",
|
|
943
|
-
)
|
|
944
|
-
|
|
945
|
-
.. seealso::
|
|
946
|
-
For more detail on about scaling clusters have a look at the reference:
|
|
947
|
-
https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/scaling-clusters
|
|
948
|
-
|
|
949
|
-
:param cluster_name: The name of the cluster to scale. (templated)
|
|
950
|
-
:param project_id: The ID of the google cloud project in which
|
|
951
|
-
the cluster runs. (templated)
|
|
952
|
-
:param region: The region for the dataproc cluster. (templated)
|
|
953
|
-
:param num_workers: The new number of workers
|
|
954
|
-
:param num_preemptible_workers: The new number of preemptible workers
|
|
955
|
-
:param graceful_decommission_timeout: Timeout for graceful YARN decommissioning.
|
|
956
|
-
Maximum value is 1d
|
|
957
|
-
:param gcp_conn_id: The connection ID to use connecting to Google Cloud.
|
|
958
|
-
:param impersonation_chain: Optional service account to impersonate using short-term
|
|
959
|
-
credentials, or chained list of accounts required to get the access_token
|
|
960
|
-
of the last account in the list, which will be impersonated in the request.
|
|
961
|
-
If set as a string, the account must grant the originating account
|
|
962
|
-
the Service Account Token Creator IAM role.
|
|
963
|
-
If set as a sequence, the identities from the list must grant
|
|
964
|
-
Service Account Token Creator IAM role to the directly preceding identity, with first
|
|
965
|
-
account from the list granting this role to the originating account (templated).
|
|
966
|
-
"""
|
|
967
|
-
|
|
968
|
-
template_fields: Sequence[str] = ("cluster_name", "project_id", "region", "impersonation_chain")
|
|
969
|
-
|
|
970
|
-
operator_extra_links = (DataprocLink(),)
|
|
971
|
-
|
|
972
|
-
def __init__(
|
|
973
|
-
self,
|
|
974
|
-
*,
|
|
975
|
-
cluster_name: str,
|
|
976
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
|
977
|
-
region: str = "global",
|
|
978
|
-
num_workers: int = 2,
|
|
979
|
-
num_preemptible_workers: int = 0,
|
|
980
|
-
graceful_decommission_timeout: str | None = None,
|
|
981
|
-
gcp_conn_id: str = "google_cloud_default",
|
|
982
|
-
impersonation_chain: str | Sequence[str] | None = None,
|
|
983
|
-
**kwargs,
|
|
984
|
-
) -> None:
|
|
985
|
-
super().__init__(**kwargs)
|
|
986
|
-
self.project_id = project_id
|
|
987
|
-
self.region = region
|
|
988
|
-
self.cluster_name = cluster_name
|
|
989
|
-
self.num_workers = num_workers
|
|
990
|
-
self.num_preemptible_workers = num_preemptible_workers
|
|
991
|
-
self.graceful_decommission_timeout = graceful_decommission_timeout
|
|
992
|
-
self.gcp_conn_id = gcp_conn_id
|
|
993
|
-
self.impersonation_chain = impersonation_chain
|
|
994
|
-
|
|
995
|
-
def _build_scale_cluster_data(self) -> dict:
|
|
996
|
-
scale_data = {
|
|
997
|
-
"config": {
|
|
998
|
-
"worker_config": {"num_instances": self.num_workers},
|
|
999
|
-
"secondary_worker_config": {"num_instances": self.num_preemptible_workers},
|
|
1000
|
-
}
|
|
1001
|
-
}
|
|
1002
|
-
return scale_data
|
|
1003
|
-
|
|
1004
|
-
@property
|
|
1005
|
-
def _graceful_decommission_timeout_object(self) -> dict[str, int] | None:
|
|
1006
|
-
if not self.graceful_decommission_timeout:
|
|
1007
|
-
return None
|
|
1008
|
-
|
|
1009
|
-
timeout = None
|
|
1010
|
-
match = re.fullmatch(r"(\d+)([smdh])", self.graceful_decommission_timeout)
|
|
1011
|
-
if match:
|
|
1012
|
-
val = int(match.group(1))
|
|
1013
|
-
unit = match.group(2)
|
|
1014
|
-
if unit == "s":
|
|
1015
|
-
timeout = val
|
|
1016
|
-
elif unit == "m":
|
|
1017
|
-
timeout = int(timedelta(minutes=val).total_seconds())
|
|
1018
|
-
elif unit == "h":
|
|
1019
|
-
timeout = int(timedelta(hours=val).total_seconds())
|
|
1020
|
-
elif unit == "d":
|
|
1021
|
-
timeout = int(timedelta(days=val).total_seconds())
|
|
1022
|
-
|
|
1023
|
-
if not timeout:
|
|
1024
|
-
raise AirflowException(
|
|
1025
|
-
"DataprocClusterScaleOperator "
|
|
1026
|
-
" should be expressed in day, hours, minutes or seconds. "
|
|
1027
|
-
" i.e. 1d, 4h, 10m, 30s"
|
|
1028
|
-
)
|
|
1029
|
-
|
|
1030
|
-
return {"seconds": timeout}
|
|
1031
|
-
|
|
1032
|
-
def execute(self, context: Context) -> None:
|
|
1033
|
-
"""Scale, up or down, a cluster on Google Cloud Dataproc."""
|
|
1034
|
-
self.log.info("Scaling cluster: %s", self.cluster_name)
|
|
1035
|
-
|
|
1036
|
-
scaling_cluster_data = self._build_scale_cluster_data()
|
|
1037
|
-
update_mask = ["config.worker_config.num_instances", "config.secondary_worker_config.num_instances"]
|
|
1038
|
-
|
|
1039
|
-
hook = DataprocHook(gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain)
|
|
1040
|
-
# Save data required to display extra link no matter what the cluster status will be
|
|
1041
|
-
DataprocLink.persist(
|
|
1042
|
-
context=context,
|
|
1043
|
-
task_instance=self,
|
|
1044
|
-
url=DATAPROC_CLUSTER_LINK_DEPRECATED,
|
|
1045
|
-
resource=self.cluster_name,
|
|
1046
|
-
)
|
|
1047
|
-
operation = hook.update_cluster(
|
|
1048
|
-
project_id=self.project_id,
|
|
1049
|
-
region=self.region,
|
|
1050
|
-
cluster_name=self.cluster_name,
|
|
1051
|
-
cluster=scaling_cluster_data,
|
|
1052
|
-
graceful_decommission_timeout=self._graceful_decommission_timeout_object,
|
|
1053
|
-
update_mask={"paths": update_mask},
|
|
1054
|
-
)
|
|
1055
|
-
operation.result()
|
|
1056
|
-
self.log.info("Cluster scaling finished")
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
925
|
class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
1060
926
|
"""
|
|
1061
927
|
Delete a cluster in a project.
|
|
@@ -1086,7 +952,13 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
|
1086
952
|
:param polling_interval_seconds: Time (seconds) to wait between calls to check the cluster status.
|
|
1087
953
|
"""
|
|
1088
954
|
|
|
1089
|
-
template_fields: Sequence[str] = (
|
|
955
|
+
template_fields: Sequence[str] = (
|
|
956
|
+
"project_id",
|
|
957
|
+
"region",
|
|
958
|
+
"cluster_name",
|
|
959
|
+
"gcp_conn_id",
|
|
960
|
+
"impersonation_chain",
|
|
961
|
+
)
|
|
1090
962
|
|
|
1091
963
|
def __init__(
|
|
1092
964
|
self,
|
|
@@ -1161,7 +1033,7 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
|
1161
1033
|
"""
|
|
1162
1034
|
if event and event["status"] == "error":
|
|
1163
1035
|
raise AirflowException(event["message"])
|
|
1164
|
-
|
|
1036
|
+
if event is None:
|
|
1165
1037
|
raise AirflowException("No event received in trigger callback")
|
|
1166
1038
|
self.log.info("Cluster deleted.")
|
|
1167
1039
|
|
|
@@ -1212,6 +1084,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
|
|
|
1212
1084
|
"region",
|
|
1213
1085
|
"project_id",
|
|
1214
1086
|
"request_id",
|
|
1087
|
+
"gcp_conn_id",
|
|
1215
1088
|
"impersonation_chain",
|
|
1216
1089
|
)
|
|
1217
1090
|
|
|
@@ -1315,7 +1188,6 @@ class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
|
|
|
1315
1188
|
cluster = super().execute(context)
|
|
1316
1189
|
DataprocClusterLink.persist(
|
|
1317
1190
|
context=context,
|
|
1318
|
-
operator=self,
|
|
1319
1191
|
cluster_id=self.cluster_name,
|
|
1320
1192
|
project_id=self._get_project_id(),
|
|
1321
1193
|
region=self.region,
|
|
@@ -1461,8 +1333,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
|
1461
1333
|
"""Initialize `self.job_template` with default values."""
|
|
1462
1334
|
if self.project_id is None:
|
|
1463
1335
|
raise AirflowException(
|
|
1464
|
-
"project id should either be set via project_id "
|
|
1465
|
-
"parameter or retrieved from the connection,"
|
|
1336
|
+
"project id should either be set via project_id parameter or retrieved from the connection,"
|
|
1466
1337
|
)
|
|
1467
1338
|
job_template = DataProcJobBuilder(
|
|
1468
1339
|
project_id=self.project_id,
|
|
@@ -1497,7 +1368,11 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
|
1497
1368
|
self.log.info("Job %s submitted successfully.", job_id)
|
|
1498
1369
|
# Save data required for extra links no matter what the job status will be
|
|
1499
1370
|
DataprocLink.persist(
|
|
1500
|
-
context=context,
|
|
1371
|
+
context=context,
|
|
1372
|
+
url=DATAPROC_JOB_LINK_DEPRECATED,
|
|
1373
|
+
resource=job_id,
|
|
1374
|
+
region=self.region,
|
|
1375
|
+
project_id=self.project_id,
|
|
1501
1376
|
)
|
|
1502
1377
|
|
|
1503
1378
|
if self.deferrable:
|
|
@@ -1517,8 +1392,7 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
|
1517
1392
|
self.hook.wait_for_job(job_id=job_id, region=self.region, project_id=self.project_id)
|
|
1518
1393
|
self.log.info("Job %s completed successfully.", job_id)
|
|
1519
1394
|
return job_id
|
|
1520
|
-
|
|
1521
|
-
raise AirflowException("Create a job template before")
|
|
1395
|
+
raise AirflowException("Create a job template before")
|
|
1522
1396
|
|
|
1523
1397
|
def execute_complete(self, context, event=None) -> None:
|
|
1524
1398
|
"""
|
|
@@ -1556,7 +1430,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1556
1430
|
:param metadata: Additional metadata that is provided to the method.
|
|
1557
1431
|
"""
|
|
1558
1432
|
|
|
1559
|
-
template_fields: Sequence[str] = ("region", "template")
|
|
1433
|
+
template_fields: Sequence[str] = ("region", "template", "gcp_conn_id")
|
|
1560
1434
|
template_fields_renderers = {"template": "json"}
|
|
1561
1435
|
operator_extra_links = (DataprocWorkflowTemplateLink(),)
|
|
1562
1436
|
|
|
@@ -1602,7 +1476,6 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1602
1476
|
if project_id:
|
|
1603
1477
|
DataprocWorkflowTemplateLink.persist(
|
|
1604
1478
|
context=context,
|
|
1605
|
-
operator=self,
|
|
1606
1479
|
workflow_template_id=self.template["id"],
|
|
1607
1480
|
region=self.region,
|
|
1608
1481
|
project_id=project_id,
|
|
@@ -1651,7 +1524,13 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1651
1524
|
:param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
|
|
1652
1525
|
"""
|
|
1653
1526
|
|
|
1654
|
-
template_fields: Sequence[str] = (
|
|
1527
|
+
template_fields: Sequence[str] = (
|
|
1528
|
+
"template_id",
|
|
1529
|
+
"gcp_conn_id",
|
|
1530
|
+
"impersonation_chain",
|
|
1531
|
+
"request_id",
|
|
1532
|
+
"parameters",
|
|
1533
|
+
)
|
|
1655
1534
|
template_fields_renderers = {"parameters": "json"}
|
|
1656
1535
|
operator_extra_links = (DataprocWorkflowLink(),)
|
|
1657
1536
|
|
|
@@ -1714,7 +1593,6 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1714
1593
|
if project_id:
|
|
1715
1594
|
DataprocWorkflowLink.persist(
|
|
1716
1595
|
context=context,
|
|
1717
|
-
operator=self,
|
|
1718
1596
|
workflow_id=workflow_id,
|
|
1719
1597
|
region=self.region,
|
|
1720
1598
|
project_id=project_id,
|
|
@@ -1800,7 +1678,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
|
1800
1678
|
:param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
|
|
1801
1679
|
"""
|
|
1802
1680
|
|
|
1803
|
-
template_fields: Sequence[str] = ("template", "impersonation_chain")
|
|
1681
|
+
template_fields: Sequence[str] = ("template", "gcp_conn_id", "impersonation_chain")
|
|
1804
1682
|
template_fields_renderers = {"template": "json"}
|
|
1805
1683
|
operator_extra_links = (DataprocWorkflowLink(),)
|
|
1806
1684
|
|
|
@@ -1870,7 +1748,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
|
1870
1748
|
if project_id:
|
|
1871
1749
|
DataprocWorkflowLink.persist(
|
|
1872
1750
|
context=context,
|
|
1873
|
-
operator=self,
|
|
1874
1751
|
workflow_id=workflow_id,
|
|
1875
1752
|
region=self.region,
|
|
1876
1753
|
project_id=project_id,
|
|
@@ -1969,7 +1846,14 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1969
1846
|
:param wait_timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False
|
|
1970
1847
|
"""
|
|
1971
1848
|
|
|
1972
|
-
template_fields: Sequence[str] = (
|
|
1849
|
+
template_fields: Sequence[str] = (
|
|
1850
|
+
"project_id",
|
|
1851
|
+
"region",
|
|
1852
|
+
"job",
|
|
1853
|
+
"gcp_conn_id",
|
|
1854
|
+
"impersonation_chain",
|
|
1855
|
+
"request_id",
|
|
1856
|
+
)
|
|
1973
1857
|
template_fields_renderers = {"job": "json"}
|
|
1974
1858
|
|
|
1975
1859
|
operator_extra_links = (DataprocJobLink(),)
|
|
@@ -2044,7 +1928,6 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
2044
1928
|
if project_id:
|
|
2045
1929
|
DataprocJobLink.persist(
|
|
2046
1930
|
context=context,
|
|
2047
|
-
operator=self,
|
|
2048
1931
|
job_id=new_job_id,
|
|
2049
1932
|
region=self.region,
|
|
2050
1933
|
project_id=project_id,
|
|
@@ -2056,9 +1939,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
2056
1939
|
state = job.status.state
|
|
2057
1940
|
if state == JobStatus.State.DONE:
|
|
2058
1941
|
return self.job_id
|
|
2059
|
-
|
|
1942
|
+
if state == JobStatus.State.ERROR:
|
|
2060
1943
|
raise AirflowException(f"Job failed:\n{job}")
|
|
2061
|
-
|
|
1944
|
+
if state == JobStatus.State.CANCELLED:
|
|
2062
1945
|
raise AirflowException(f"Job was cancelled:\n{job}")
|
|
2063
1946
|
self.defer(
|
|
2064
1947
|
trigger=DataprocSubmitTrigger(
|
|
@@ -2090,9 +1973,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
2090
1973
|
job_state = event["job_state"]
|
|
2091
1974
|
job_id = event["job_id"]
|
|
2092
1975
|
job = event["job"]
|
|
2093
|
-
if job_state == JobStatus.State.ERROR:
|
|
1976
|
+
if job_state == JobStatus.State.ERROR.name: # type: ignore
|
|
2094
1977
|
raise AirflowException(f"Job {job_id} failed:\n{job}")
|
|
2095
|
-
if job_state == JobStatus.State.CANCELLED:
|
|
1978
|
+
if job_state == JobStatus.State.CANCELLED.name: # type: ignore
|
|
2096
1979
|
raise AirflowException(f"Job {job_id} was cancelled:\n{job}")
|
|
2097
1980
|
self.log.info("%s completed successfully.", self.task_id)
|
|
2098
1981
|
return job_id
|
|
@@ -2169,6 +2052,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
|
2169
2052
|
"region",
|
|
2170
2053
|
"request_id",
|
|
2171
2054
|
"project_id",
|
|
2055
|
+
"gcp_conn_id",
|
|
2172
2056
|
"impersonation_chain",
|
|
2173
2057
|
)
|
|
2174
2058
|
operator_extra_links = (DataprocClusterLink(),)
|
|
@@ -2217,7 +2101,6 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
|
2217
2101
|
if project_id:
|
|
2218
2102
|
DataprocClusterLink.persist(
|
|
2219
2103
|
context=context,
|
|
2220
|
-
operator=self,
|
|
2221
2104
|
cluster_id=self.cluster_name,
|
|
2222
2105
|
project_id=project_id,
|
|
2223
2106
|
region=self.region,
|
|
@@ -2305,6 +2188,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
|
|
|
2305
2188
|
"project_id",
|
|
2306
2189
|
"region",
|
|
2307
2190
|
"cluster_name",
|
|
2191
|
+
"gcp_conn_id",
|
|
2308
2192
|
"impersonation_chain",
|
|
2309
2193
|
"tarball_gcs_dir",
|
|
2310
2194
|
"diagnosis_interval",
|
|
@@ -2451,6 +2335,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2451
2335
|
"batch",
|
|
2452
2336
|
"batch_id",
|
|
2453
2337
|
"region",
|
|
2338
|
+
"gcp_conn_id",
|
|
2454
2339
|
"impersonation_chain",
|
|
2455
2340
|
)
|
|
2456
2341
|
operator_extra_links = (DataprocBatchLink(),)
|
|
@@ -2516,7 +2401,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2516
2401
|
# Persist the link earlier so users can observe the progress
|
|
2517
2402
|
DataprocBatchLink.persist(
|
|
2518
2403
|
context=context,
|
|
2519
|
-
operator=self,
|
|
2520
2404
|
project_id=self.project_id,
|
|
2521
2405
|
region=self.region,
|
|
2522
2406
|
batch_id=self.batch_id,
|
|
@@ -2528,6 +2412,8 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2528
2412
|
self.log.info("Automatic injection of OpenLineage information into Spark properties is enabled.")
|
|
2529
2413
|
self._inject_openlineage_properties_into_dataproc_batch(context)
|
|
2530
2414
|
|
|
2415
|
+
self.__update_batch_labels()
|
|
2416
|
+
|
|
2531
2417
|
try:
|
|
2532
2418
|
self.operation = self.hook.create_batch(
|
|
2533
2419
|
region=self.region,
|
|
@@ -2551,7 +2437,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2551
2437
|
|
|
2552
2438
|
DataprocBatchLink.persist(
|
|
2553
2439
|
context=context,
|
|
2554
|
-
operator=self,
|
|
2555
2440
|
project_id=self.project_id,
|
|
2556
2441
|
region=self.region,
|
|
2557
2442
|
batch_id=batch_id,
|
|
@@ -2601,7 +2486,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2601
2486
|
if not self.hook.check_error_for_resource_is_not_ready_msg(batch.state_message):
|
|
2602
2487
|
break
|
|
2603
2488
|
|
|
2604
|
-
self.handle_batch_status(context, batch.state, batch_id, batch.state_message)
|
|
2489
|
+
self.handle_batch_status(context, batch.state.name, batch_id, batch.state_message)
|
|
2605
2490
|
return Batch.to_dict(batch)
|
|
2606
2491
|
|
|
2607
2492
|
@cached_property
|
|
@@ -2626,21 +2511,21 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2626
2511
|
self.operation.cancel()
|
|
2627
2512
|
|
|
2628
2513
|
def handle_batch_status(
|
|
2629
|
-
self, context: Context, state:
|
|
2514
|
+
self, context: Context, state: str, batch_id: str, state_message: str | None = None
|
|
2630
2515
|
) -> None:
|
|
2631
2516
|
# The existing batch may be a number of states other than 'SUCCEEDED'\
|
|
2632
2517
|
# wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
|
|
2633
2518
|
# finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
|
|
2634
2519
|
link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, batch_id=batch_id)
|
|
2635
|
-
if state == Batch.State.FAILED:
|
|
2520
|
+
if state == Batch.State.FAILED.name: # type: ignore
|
|
2636
2521
|
raise AirflowException(
|
|
2637
|
-
f"Batch job {batch_id} failed with error: {state_message}
|
|
2522
|
+
f"Batch job {batch_id} failed with error: {state_message}.\nDriver logs: {link}"
|
|
2638
2523
|
)
|
|
2639
|
-
if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
|
|
2640
|
-
raise AirflowException(f"Batch job {batch_id} was cancelled
|
|
2641
|
-
if state == Batch.State.STATE_UNSPECIFIED:
|
|
2642
|
-
raise AirflowException(f"Batch job {batch_id} unspecified
|
|
2643
|
-
self.log.info("Batch job %s completed
|
|
2524
|
+
if state in (Batch.State.CANCELLED.name, Batch.State.CANCELLING.name): # type: ignore
|
|
2525
|
+
raise AirflowException(f"Batch job {batch_id} was cancelled.\nDriver logs: {link}")
|
|
2526
|
+
if state == Batch.State.STATE_UNSPECIFIED.name: # type: ignore
|
|
2527
|
+
raise AirflowException(f"Batch job {batch_id} unspecified.\nDriver logs: {link}")
|
|
2528
|
+
self.log.info("Batch job %s completed.\nDriver logs: %s", batch_id, link)
|
|
2644
2529
|
|
|
2645
2530
|
def retry_batch_creation(
|
|
2646
2531
|
self,
|
|
@@ -2708,6 +2593,31 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2708
2593
|
exc_info=e,
|
|
2709
2594
|
)
|
|
2710
2595
|
|
|
2596
|
+
def __update_batch_labels(self):
|
|
2597
|
+
dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
|
|
2598
|
+
task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
|
|
2599
|
+
|
|
2600
|
+
labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
|
|
2601
|
+
if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
|
|
2602
|
+
return
|
|
2603
|
+
|
|
2604
|
+
labels_limit = 32
|
|
2605
|
+
new_labels = {"airflow-dag-id": dag_id, "airflow-task-id": task_id}
|
|
2606
|
+
|
|
2607
|
+
if self._dag:
|
|
2608
|
+
dag_display_name = re.sub(r"[.\s]", "_", self._dag.dag_display_name.lower())
|
|
2609
|
+
if labels_regex.match(dag_id):
|
|
2610
|
+
new_labels["airflow-dag-display-name"] = dag_display_name
|
|
2611
|
+
|
|
2612
|
+
if isinstance(self.batch, Batch):
|
|
2613
|
+
if len(self.batch.labels) + len(new_labels) <= labels_limit:
|
|
2614
|
+
self.batch.labels.update(new_labels)
|
|
2615
|
+
elif "labels" not in self.batch:
|
|
2616
|
+
self.batch["labels"] = new_labels
|
|
2617
|
+
elif isinstance(self.batch.get("labels"), dict):
|
|
2618
|
+
if len(self.batch["labels"]) + len(new_labels) <= labels_limit:
|
|
2619
|
+
self.batch["labels"].update(new_labels)
|
|
2620
|
+
|
|
2711
2621
|
|
|
2712
2622
|
class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
|
2713
2623
|
"""
|
|
@@ -2734,7 +2644,13 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
|
|
2734
2644
|
account from the list granting this role to the originating account (templated).
|
|
2735
2645
|
"""
|
|
2736
2646
|
|
|
2737
|
-
template_fields: Sequence[str] = (
|
|
2647
|
+
template_fields: Sequence[str] = (
|
|
2648
|
+
"batch_id",
|
|
2649
|
+
"region",
|
|
2650
|
+
"project_id",
|
|
2651
|
+
"gcp_conn_id",
|
|
2652
|
+
"impersonation_chain",
|
|
2653
|
+
)
|
|
2738
2654
|
|
|
2739
2655
|
def __init__(
|
|
2740
2656
|
self,
|
|
@@ -2798,7 +2714,13 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
|
2798
2714
|
account from the list granting this role to the originating account (templated).
|
|
2799
2715
|
"""
|
|
2800
2716
|
|
|
2801
|
-
template_fields: Sequence[str] = (
|
|
2717
|
+
template_fields: Sequence[str] = (
|
|
2718
|
+
"batch_id",
|
|
2719
|
+
"region",
|
|
2720
|
+
"project_id",
|
|
2721
|
+
"gcp_conn_id",
|
|
2722
|
+
"impersonation_chain",
|
|
2723
|
+
)
|
|
2802
2724
|
operator_extra_links = (DataprocBatchLink(),)
|
|
2803
2725
|
|
|
2804
2726
|
def __init__(
|
|
@@ -2839,7 +2761,6 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
|
2839
2761
|
if project_id:
|
|
2840
2762
|
DataprocBatchLink.persist(
|
|
2841
2763
|
context=context,
|
|
2842
|
-
operator=self,
|
|
2843
2764
|
project_id=project_id,
|
|
2844
2765
|
region=self.region,
|
|
2845
2766
|
batch_id=self.batch_id,
|
|
@@ -2875,7 +2796,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
|
2875
2796
|
:param order_by: How to order results as specified in ListBatchesRequest
|
|
2876
2797
|
"""
|
|
2877
2798
|
|
|
2878
|
-
template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
|
|
2799
|
+
template_fields: Sequence[str] = ("region", "project_id", "gcp_conn_id", "impersonation_chain")
|
|
2879
2800
|
operator_extra_links = (DataprocBatchesListLink(),)
|
|
2880
2801
|
|
|
2881
2802
|
def __init__(
|
|
@@ -2922,7 +2843,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
|
2922
2843
|
)
|
|
2923
2844
|
project_id = self.project_id or hook.project_id
|
|
2924
2845
|
if project_id:
|
|
2925
|
-
DataprocBatchesListLink.persist(context=context,
|
|
2846
|
+
DataprocBatchesListLink.persist(context=context, project_id=project_id)
|
|
2926
2847
|
return [Batch.to_dict(result) for result in results]
|
|
2927
2848
|
|
|
2928
2849
|
|
|
@@ -2949,7 +2870,13 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
|
|
|
2949
2870
|
account from the list granting this role to the originating account (templated).
|
|
2950
2871
|
"""
|
|
2951
2872
|
|
|
2952
|
-
template_fields: Sequence[str] = (
|
|
2873
|
+
template_fields: Sequence[str] = (
|
|
2874
|
+
"operation_name",
|
|
2875
|
+
"region",
|
|
2876
|
+
"project_id",
|
|
2877
|
+
"gcp_conn_id",
|
|
2878
|
+
"impersonation_chain",
|
|
2879
|
+
)
|
|
2953
2880
|
|
|
2954
2881
|
def __init__(
|
|
2955
2882
|
self,
|