apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -5
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +166 -281
- airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
- airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
- airflow/providers/google/cloud/hooks/dataflow.py +71 -94
- airflow/providers/google/cloud/hooks/datafusion.py +1 -1
- airflow/providers/google/cloud/hooks/dataplex.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +72 -71
- airflow/providers/google/cloud/hooks/gcs.py +111 -14
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/mlengine.py +3 -2
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +73 -8
- airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +2 -2
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +70 -55
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/operators/bigtable.py +36 -7
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
- airflow/providers/google/cloud/operators/cloud_build.py +75 -32
- airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +23 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
- airflow/providers/google/cloud/operators/compute.py +8 -40
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +41 -20
- airflow/providers/google/cloud/operators/dataplex.py +193 -109
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +78 -35
- airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +16 -7
- airflow/providers/google/cloud/operators/gcs.py +10 -8
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +60 -14
- airflow/providers/google/cloud/operators/spanner.py +25 -12
- airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
- airflow/providers/google/cloud/operators/translate.py +40 -16
- airflow/providers/google/cloud/operators/translate_speech.py +1 -2
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/cloud/sensors/dataflow.py +26 -9
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +2 -2
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
- airflow/providers/google/cloud/sensors/gcs.py +4 -4
- airflow/providers/google/cloud/sensors/looker.py +2 -2
- airflow/providers/google/cloud/sensors/pubsub.py +4 -4
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +2 -2
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +122 -52
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -8
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -72,7 +72,7 @@ if TYPE_CHECKING:
|
|
|
72
72
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
73
73
|
from google.type.interval_pb2 import Interval
|
|
74
74
|
|
|
75
|
-
from airflow.
|
|
75
|
+
from airflow.providers.common.compat.sdk import Context
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
class PreemptibilityType(Enum):
|
|
@@ -213,6 +213,7 @@ class ClusterGenerator:
|
|
|
213
213
|
:param secondary_worker_accelerator_type: Type of the accelerator card (GPU) to attach to the secondary workers,
|
|
214
214
|
see https://cloud.google.com/dataproc/docs/reference/rest/v1/InstanceGroupConfig#acceleratorconfig
|
|
215
215
|
:param secondary_worker_accelerator_count: Number of accelerator cards (GPUs) to attach to the secondary workers
|
|
216
|
+
:param cluster_tier: The tier of the cluster (e.g. "CLUSTER_TIER_STANDARD" / "CLUSTER_TIER_PREMIUM").
|
|
216
217
|
"""
|
|
217
218
|
|
|
218
219
|
def __init__(
|
|
@@ -261,6 +262,8 @@ class ClusterGenerator:
|
|
|
261
262
|
secondary_worker_instance_flexibility_policy: InstanceFlexibilityPolicy | None = None,
|
|
262
263
|
secondary_worker_accelerator_type: str | None = None,
|
|
263
264
|
secondary_worker_accelerator_count: int | None = None,
|
|
265
|
+
*,
|
|
266
|
+
cluster_tier: str | None = None,
|
|
264
267
|
**kwargs,
|
|
265
268
|
) -> None:
|
|
266
269
|
self.project_id = project_id
|
|
@@ -308,6 +311,7 @@ class ClusterGenerator:
|
|
|
308
311
|
self.secondary_worker_instance_flexibility_policy = secondary_worker_instance_flexibility_policy
|
|
309
312
|
self.secondary_worker_accelerator_type = secondary_worker_accelerator_type
|
|
310
313
|
self.secondary_worker_accelerator_count = secondary_worker_accelerator_count
|
|
314
|
+
self.cluster_tier = cluster_tier
|
|
311
315
|
|
|
312
316
|
if self.custom_image and self.image_version:
|
|
313
317
|
raise ValueError("The custom_image and image_version can't be both set")
|
|
@@ -513,6 +517,9 @@ class ClusterGenerator:
|
|
|
513
517
|
if self.driver_pool_size > 0:
|
|
514
518
|
cluster_data["auxiliary_node_groups"] = [self._build_driver_pool()]
|
|
515
519
|
|
|
520
|
+
if self.cluster_tier:
|
|
521
|
+
cluster_data["cluster_tier"] = self.cluster_tier
|
|
522
|
+
|
|
516
523
|
cluster_data = self._build_gce_cluster_config(cluster_data)
|
|
517
524
|
|
|
518
525
|
if self.single_node:
|
|
@@ -621,6 +628,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
621
628
|
"virtual_cluster_config",
|
|
622
629
|
"cluster_name",
|
|
623
630
|
"labels",
|
|
631
|
+
"gcp_conn_id",
|
|
624
632
|
"impersonation_chain",
|
|
625
633
|
)
|
|
626
634
|
template_fields_renderers = {"cluster_config": "json", "virtual_cluster_config": "json"}
|
|
@@ -807,7 +815,6 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
807
815
|
if project_id:
|
|
808
816
|
DataprocClusterLink.persist(
|
|
809
817
|
context=context,
|
|
810
|
-
operator=self,
|
|
811
818
|
cluster_id=self.cluster_name,
|
|
812
819
|
project_id=project_id,
|
|
813
820
|
region=self.region,
|
|
@@ -908,7 +915,7 @@ class DataprocCreateClusterOperator(GoogleCloudBaseOperator):
|
|
|
908
915
|
cluster_state = event["cluster_state"]
|
|
909
916
|
cluster_name = event["cluster_name"]
|
|
910
917
|
|
|
911
|
-
if cluster_state == ClusterStatus.State.
|
|
918
|
+
if cluster_state == ClusterStatus.State(ClusterStatus.State.DELETING).name:
|
|
912
919
|
raise AirflowException(f"Cluster is in ERROR state:\n{cluster_name}")
|
|
913
920
|
|
|
914
921
|
self.log.info("%s completed successfully.", self.task_id)
|
|
@@ -945,7 +952,13 @@ class DataprocDeleteClusterOperator(GoogleCloudBaseOperator):
|
|
|
945
952
|
:param polling_interval_seconds: Time (seconds) to wait between calls to check the cluster status.
|
|
946
953
|
"""
|
|
947
954
|
|
|
948
|
-
template_fields: Sequence[str] = (
|
|
955
|
+
template_fields: Sequence[str] = (
|
|
956
|
+
"project_id",
|
|
957
|
+
"region",
|
|
958
|
+
"cluster_name",
|
|
959
|
+
"gcp_conn_id",
|
|
960
|
+
"impersonation_chain",
|
|
961
|
+
)
|
|
949
962
|
|
|
950
963
|
def __init__(
|
|
951
964
|
self,
|
|
@@ -1071,6 +1084,7 @@ class _DataprocStartStopClusterBaseOperator(GoogleCloudBaseOperator):
|
|
|
1071
1084
|
"region",
|
|
1072
1085
|
"project_id",
|
|
1073
1086
|
"request_id",
|
|
1087
|
+
"gcp_conn_id",
|
|
1074
1088
|
"impersonation_chain",
|
|
1075
1089
|
)
|
|
1076
1090
|
|
|
@@ -1174,7 +1188,6 @@ class DataprocStartClusterOperator(_DataprocStartStopClusterBaseOperator):
|
|
|
1174
1188
|
cluster = super().execute(context)
|
|
1175
1189
|
DataprocClusterLink.persist(
|
|
1176
1190
|
context=context,
|
|
1177
|
-
operator=self,
|
|
1178
1191
|
cluster_id=self.cluster_name,
|
|
1179
1192
|
project_id=self._get_project_id(),
|
|
1180
1193
|
region=self.region,
|
|
@@ -1355,7 +1368,11 @@ class DataprocJobBaseOperator(GoogleCloudBaseOperator):
|
|
|
1355
1368
|
self.log.info("Job %s submitted successfully.", job_id)
|
|
1356
1369
|
# Save data required for extra links no matter what the job status will be
|
|
1357
1370
|
DataprocLink.persist(
|
|
1358
|
-
context=context,
|
|
1371
|
+
context=context,
|
|
1372
|
+
url=DATAPROC_JOB_LINK_DEPRECATED,
|
|
1373
|
+
resource=job_id,
|
|
1374
|
+
region=self.region,
|
|
1375
|
+
project_id=self.project_id,
|
|
1359
1376
|
)
|
|
1360
1377
|
|
|
1361
1378
|
if self.deferrable:
|
|
@@ -1413,7 +1430,7 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1413
1430
|
:param metadata: Additional metadata that is provided to the method.
|
|
1414
1431
|
"""
|
|
1415
1432
|
|
|
1416
|
-
template_fields: Sequence[str] = ("region", "template")
|
|
1433
|
+
template_fields: Sequence[str] = ("region", "template", "gcp_conn_id")
|
|
1417
1434
|
template_fields_renderers = {"template": "json"}
|
|
1418
1435
|
operator_extra_links = (DataprocWorkflowTemplateLink(),)
|
|
1419
1436
|
|
|
@@ -1459,7 +1476,6 @@ class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1459
1476
|
if project_id:
|
|
1460
1477
|
DataprocWorkflowTemplateLink.persist(
|
|
1461
1478
|
context=context,
|
|
1462
|
-
operator=self,
|
|
1463
1479
|
workflow_template_id=self.template["id"],
|
|
1464
1480
|
region=self.region,
|
|
1465
1481
|
project_id=project_id,
|
|
@@ -1508,7 +1524,13 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1508
1524
|
:param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
|
|
1509
1525
|
"""
|
|
1510
1526
|
|
|
1511
|
-
template_fields: Sequence[str] = (
|
|
1527
|
+
template_fields: Sequence[str] = (
|
|
1528
|
+
"template_id",
|
|
1529
|
+
"gcp_conn_id",
|
|
1530
|
+
"impersonation_chain",
|
|
1531
|
+
"request_id",
|
|
1532
|
+
"parameters",
|
|
1533
|
+
)
|
|
1512
1534
|
template_fields_renderers = {"parameters": "json"}
|
|
1513
1535
|
operator_extra_links = (DataprocWorkflowLink(),)
|
|
1514
1536
|
|
|
@@ -1571,7 +1593,6 @@ class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator):
|
|
|
1571
1593
|
if project_id:
|
|
1572
1594
|
DataprocWorkflowLink.persist(
|
|
1573
1595
|
context=context,
|
|
1574
|
-
operator=self,
|
|
1575
1596
|
workflow_id=workflow_id,
|
|
1576
1597
|
region=self.region,
|
|
1577
1598
|
project_id=project_id,
|
|
@@ -1657,7 +1678,7 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
|
1657
1678
|
:param cancel_on_kill: Flag which indicates whether cancel the workflow, when on_kill is called
|
|
1658
1679
|
"""
|
|
1659
1680
|
|
|
1660
|
-
template_fields: Sequence[str] = ("template", "impersonation_chain")
|
|
1681
|
+
template_fields: Sequence[str] = ("template", "gcp_conn_id", "impersonation_chain")
|
|
1661
1682
|
template_fields_renderers = {"template": "json"}
|
|
1662
1683
|
operator_extra_links = (DataprocWorkflowLink(),)
|
|
1663
1684
|
|
|
@@ -1727,7 +1748,6 @@ class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator)
|
|
|
1727
1748
|
if project_id:
|
|
1728
1749
|
DataprocWorkflowLink.persist(
|
|
1729
1750
|
context=context,
|
|
1730
|
-
operator=self,
|
|
1731
1751
|
workflow_id=workflow_id,
|
|
1732
1752
|
region=self.region,
|
|
1733
1753
|
project_id=project_id,
|
|
@@ -1826,7 +1846,14 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1826
1846
|
:param wait_timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False
|
|
1827
1847
|
"""
|
|
1828
1848
|
|
|
1829
|
-
template_fields: Sequence[str] = (
|
|
1849
|
+
template_fields: Sequence[str] = (
|
|
1850
|
+
"project_id",
|
|
1851
|
+
"region",
|
|
1852
|
+
"job",
|
|
1853
|
+
"gcp_conn_id",
|
|
1854
|
+
"impersonation_chain",
|
|
1855
|
+
"request_id",
|
|
1856
|
+
)
|
|
1830
1857
|
template_fields_renderers = {"job": "json"}
|
|
1831
1858
|
|
|
1832
1859
|
operator_extra_links = (DataprocJobLink(),)
|
|
@@ -1901,7 +1928,6 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1901
1928
|
if project_id:
|
|
1902
1929
|
DataprocJobLink.persist(
|
|
1903
1930
|
context=context,
|
|
1904
|
-
operator=self,
|
|
1905
1931
|
job_id=new_job_id,
|
|
1906
1932
|
region=self.region,
|
|
1907
1933
|
project_id=project_id,
|
|
@@ -1947,9 +1973,9 @@ class DataprocSubmitJobOperator(GoogleCloudBaseOperator):
|
|
|
1947
1973
|
job_state = event["job_state"]
|
|
1948
1974
|
job_id = event["job_id"]
|
|
1949
1975
|
job = event["job"]
|
|
1950
|
-
if job_state == JobStatus.State.ERROR:
|
|
1976
|
+
if job_state == JobStatus.State.ERROR.name: # type: ignore
|
|
1951
1977
|
raise AirflowException(f"Job {job_id} failed:\n{job}")
|
|
1952
|
-
if job_state == JobStatus.State.CANCELLED:
|
|
1978
|
+
if job_state == JobStatus.State.CANCELLED.name: # type: ignore
|
|
1953
1979
|
raise AirflowException(f"Job {job_id} was cancelled:\n{job}")
|
|
1954
1980
|
self.log.info("%s completed successfully.", self.task_id)
|
|
1955
1981
|
return job_id
|
|
@@ -2026,6 +2052,7 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
|
2026
2052
|
"region",
|
|
2027
2053
|
"request_id",
|
|
2028
2054
|
"project_id",
|
|
2055
|
+
"gcp_conn_id",
|
|
2029
2056
|
"impersonation_chain",
|
|
2030
2057
|
)
|
|
2031
2058
|
operator_extra_links = (DataprocClusterLink(),)
|
|
@@ -2074,7 +2101,6 @@ class DataprocUpdateClusterOperator(GoogleCloudBaseOperator):
|
|
|
2074
2101
|
if project_id:
|
|
2075
2102
|
DataprocClusterLink.persist(
|
|
2076
2103
|
context=context,
|
|
2077
|
-
operator=self,
|
|
2078
2104
|
cluster_id=self.cluster_name,
|
|
2079
2105
|
project_id=project_id,
|
|
2080
2106
|
region=self.region,
|
|
@@ -2162,6 +2188,7 @@ class DataprocDiagnoseClusterOperator(GoogleCloudBaseOperator):
|
|
|
2162
2188
|
"project_id",
|
|
2163
2189
|
"region",
|
|
2164
2190
|
"cluster_name",
|
|
2191
|
+
"gcp_conn_id",
|
|
2165
2192
|
"impersonation_chain",
|
|
2166
2193
|
"tarball_gcs_dir",
|
|
2167
2194
|
"diagnosis_interval",
|
|
@@ -2308,6 +2335,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2308
2335
|
"batch",
|
|
2309
2336
|
"batch_id",
|
|
2310
2337
|
"region",
|
|
2338
|
+
"gcp_conn_id",
|
|
2311
2339
|
"impersonation_chain",
|
|
2312
2340
|
)
|
|
2313
2341
|
operator_extra_links = (DataprocBatchLink(),)
|
|
@@ -2373,7 +2401,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2373
2401
|
# Persist the link earlier so users can observe the progress
|
|
2374
2402
|
DataprocBatchLink.persist(
|
|
2375
2403
|
context=context,
|
|
2376
|
-
operator=self,
|
|
2377
2404
|
project_id=self.project_id,
|
|
2378
2405
|
region=self.region,
|
|
2379
2406
|
batch_id=self.batch_id,
|
|
@@ -2410,7 +2437,6 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2410
2437
|
|
|
2411
2438
|
DataprocBatchLink.persist(
|
|
2412
2439
|
context=context,
|
|
2413
|
-
operator=self,
|
|
2414
2440
|
project_id=self.project_id,
|
|
2415
2441
|
region=self.region,
|
|
2416
2442
|
batch_id=batch_id,
|
|
@@ -2460,7 +2486,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2460
2486
|
if not self.hook.check_error_for_resource_is_not_ready_msg(batch.state_message):
|
|
2461
2487
|
break
|
|
2462
2488
|
|
|
2463
|
-
self.handle_batch_status(context, batch.state, batch_id, batch.state_message)
|
|
2489
|
+
self.handle_batch_status(context, batch.state.name, batch_id, batch.state_message)
|
|
2464
2490
|
return Batch.to_dict(batch)
|
|
2465
2491
|
|
|
2466
2492
|
@cached_property
|
|
@@ -2485,21 +2511,21 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2485
2511
|
self.operation.cancel()
|
|
2486
2512
|
|
|
2487
2513
|
def handle_batch_status(
|
|
2488
|
-
self, context: Context, state:
|
|
2514
|
+
self, context: Context, state: str, batch_id: str, state_message: str | None = None
|
|
2489
2515
|
) -> None:
|
|
2490
2516
|
# The existing batch may be a number of states other than 'SUCCEEDED'\
|
|
2491
2517
|
# wait_for_operation doesn't fail if the job is cancelled, so we will check for it here which also
|
|
2492
2518
|
# finds a cancelling|canceled|unspecified job from wait_for_batch or the deferred trigger
|
|
2493
2519
|
link = DATAPROC_BATCH_LINK.format(region=self.region, project_id=self.project_id, batch_id=batch_id)
|
|
2494
|
-
if state == Batch.State.FAILED:
|
|
2520
|
+
if state == Batch.State.FAILED.name: # type: ignore
|
|
2495
2521
|
raise AirflowException(
|
|
2496
|
-
f"Batch job {batch_id} failed with error: {state_message}
|
|
2522
|
+
f"Batch job {batch_id} failed with error: {state_message}.\nDriver logs: {link}"
|
|
2497
2523
|
)
|
|
2498
|
-
if state in (Batch.State.CANCELLED, Batch.State.CANCELLING):
|
|
2499
|
-
raise AirflowException(f"Batch job {batch_id} was cancelled
|
|
2500
|
-
if state == Batch.State.STATE_UNSPECIFIED:
|
|
2501
|
-
raise AirflowException(f"Batch job {batch_id} unspecified
|
|
2502
|
-
self.log.info("Batch job %s completed
|
|
2524
|
+
if state in (Batch.State.CANCELLED.name, Batch.State.CANCELLING.name): # type: ignore
|
|
2525
|
+
raise AirflowException(f"Batch job {batch_id} was cancelled.\nDriver logs: {link}")
|
|
2526
|
+
if state == Batch.State.STATE_UNSPECIFIED.name: # type: ignore
|
|
2527
|
+
raise AirflowException(f"Batch job {batch_id} unspecified.\nDriver logs: {link}")
|
|
2528
|
+
self.log.info("Batch job %s completed.\nDriver logs: %s", batch_id, link)
|
|
2503
2529
|
|
|
2504
2530
|
def retry_batch_creation(
|
|
2505
2531
|
self,
|
|
@@ -2571,7 +2597,7 @@ class DataprocCreateBatchOperator(GoogleCloudBaseOperator):
|
|
|
2571
2597
|
dag_id = re.sub(r"[.\s]", "_", self.dag_id.lower())
|
|
2572
2598
|
task_id = re.sub(r"[.\s]", "_", self.task_id.lower())
|
|
2573
2599
|
|
|
2574
|
-
labels_regex = re.compile(r"^[a-z][\w-]{0,
|
|
2600
|
+
labels_regex = re.compile(r"^[a-z][\w-]{0,62}$")
|
|
2575
2601
|
if not labels_regex.match(dag_id) or not labels_regex.match(task_id):
|
|
2576
2602
|
return
|
|
2577
2603
|
|
|
@@ -2618,7 +2644,13 @@ class DataprocDeleteBatchOperator(GoogleCloudBaseOperator):
|
|
|
2618
2644
|
account from the list granting this role to the originating account (templated).
|
|
2619
2645
|
"""
|
|
2620
2646
|
|
|
2621
|
-
template_fields: Sequence[str] = (
|
|
2647
|
+
template_fields: Sequence[str] = (
|
|
2648
|
+
"batch_id",
|
|
2649
|
+
"region",
|
|
2650
|
+
"project_id",
|
|
2651
|
+
"gcp_conn_id",
|
|
2652
|
+
"impersonation_chain",
|
|
2653
|
+
)
|
|
2622
2654
|
|
|
2623
2655
|
def __init__(
|
|
2624
2656
|
self,
|
|
@@ -2682,7 +2714,13 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
|
2682
2714
|
account from the list granting this role to the originating account (templated).
|
|
2683
2715
|
"""
|
|
2684
2716
|
|
|
2685
|
-
template_fields: Sequence[str] = (
|
|
2717
|
+
template_fields: Sequence[str] = (
|
|
2718
|
+
"batch_id",
|
|
2719
|
+
"region",
|
|
2720
|
+
"project_id",
|
|
2721
|
+
"gcp_conn_id",
|
|
2722
|
+
"impersonation_chain",
|
|
2723
|
+
)
|
|
2686
2724
|
operator_extra_links = (DataprocBatchLink(),)
|
|
2687
2725
|
|
|
2688
2726
|
def __init__(
|
|
@@ -2723,7 +2761,6 @@ class DataprocGetBatchOperator(GoogleCloudBaseOperator):
|
|
|
2723
2761
|
if project_id:
|
|
2724
2762
|
DataprocBatchLink.persist(
|
|
2725
2763
|
context=context,
|
|
2726
|
-
operator=self,
|
|
2727
2764
|
project_id=project_id,
|
|
2728
2765
|
region=self.region,
|
|
2729
2766
|
batch_id=self.batch_id,
|
|
@@ -2759,7 +2796,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
|
2759
2796
|
:param order_by: How to order results as specified in ListBatchesRequest
|
|
2760
2797
|
"""
|
|
2761
2798
|
|
|
2762
|
-
template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain")
|
|
2799
|
+
template_fields: Sequence[str] = ("region", "project_id", "gcp_conn_id", "impersonation_chain")
|
|
2763
2800
|
operator_extra_links = (DataprocBatchesListLink(),)
|
|
2764
2801
|
|
|
2765
2802
|
def __init__(
|
|
@@ -2806,7 +2843,7 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator):
|
|
|
2806
2843
|
)
|
|
2807
2844
|
project_id = self.project_id or hook.project_id
|
|
2808
2845
|
if project_id:
|
|
2809
|
-
DataprocBatchesListLink.persist(context=context,
|
|
2846
|
+
DataprocBatchesListLink.persist(context=context, project_id=project_id)
|
|
2810
2847
|
return [Batch.to_dict(result) for result in results]
|
|
2811
2848
|
|
|
2812
2849
|
|
|
@@ -2833,7 +2870,13 @@ class DataprocCancelOperationOperator(GoogleCloudBaseOperator):
|
|
|
2833
2870
|
account from the list granting this role to the originating account (templated).
|
|
2834
2871
|
"""
|
|
2835
2872
|
|
|
2836
|
-
template_fields: Sequence[str] = (
|
|
2873
|
+
template_fields: Sequence[str] = (
|
|
2874
|
+
"operation_name",
|
|
2875
|
+
"region",
|
|
2876
|
+
"project_id",
|
|
2877
|
+
"gcp_conn_id",
|
|
2878
|
+
"impersonation_chain",
|
|
2879
|
+
)
|
|
2837
2880
|
|
|
2838
2881
|
def __init__(
|
|
2839
2882
|
self,
|