apache-airflow-providers-google 14.0.0__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/LICENSES.txt +14 -0
- airflow/providers/google/3rd-party-licenses/NOTICE +5 -0
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/_vendor/__init__.py +0 -0
- airflow/providers/google/_vendor/json_merge_patch.py +91 -0
- airflow/providers/google/ads/hooks/ads.py +52 -43
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +3 -19
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/_internal_client/secret_manager_client.py +3 -2
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/alloy_db.py +2 -3
- airflow/providers/google/cloud/hooks/bigquery.py +195 -318
- airflow/providers/google/cloud/hooks/bigquery_dts.py +8 -8
- airflow/providers/google/cloud/hooks/bigtable.py +3 -2
- airflow/providers/google/cloud/hooks/cloud_batch.py +8 -9
- airflow/providers/google/cloud/hooks/cloud_build.py +6 -65
- airflow/providers/google/cloud/hooks/cloud_composer.py +292 -24
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +4 -3
- airflow/providers/google/cloud/hooks/cloud_run.py +20 -11
- airflow/providers/google/cloud/hooks/cloud_sql.py +136 -64
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +35 -15
- airflow/providers/google/cloud/hooks/compute.py +7 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +7 -4
- airflow/providers/google/cloud/hooks/datacatalog.py +12 -3
- airflow/providers/google/cloud/hooks/dataflow.py +87 -242
- airflow/providers/google/cloud/hooks/dataform.py +9 -14
- airflow/providers/google/cloud/hooks/datafusion.py +7 -9
- airflow/providers/google/cloud/hooks/dataplex.py +13 -12
- airflow/providers/google/cloud/hooks/dataprep.py +2 -2
- airflow/providers/google/cloud/hooks/dataproc.py +76 -74
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/hooks/dlp.py +5 -4
- airflow/providers/google/cloud/hooks/gcs.py +144 -33
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kms.py +3 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +22 -17
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/managed_kafka.py +227 -3
- airflow/providers/google/cloud/hooks/mlengine.py +7 -8
- airflow/providers/google/cloud/hooks/natural_language.py +3 -2
- airflow/providers/google/cloud/hooks/os_login.py +3 -2
- airflow/providers/google/cloud/hooks/pubsub.py +6 -6
- airflow/providers/google/cloud/hooks/secret_manager.py +105 -12
- airflow/providers/google/cloud/hooks/spanner.py +75 -10
- airflow/providers/google/cloud/hooks/speech_to_text.py +3 -2
- airflow/providers/google/cloud/hooks/stackdriver.py +18 -18
- airflow/providers/google/cloud/hooks/tasks.py +4 -3
- airflow/providers/google/cloud/hooks/text_to_speech.py +3 -2
- airflow/providers/google/cloud/hooks/translate.py +8 -17
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +8 -222
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +9 -15
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +33 -283
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +5 -12
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +311 -10
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +7 -13
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +8 -12
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +6 -12
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +3 -2
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/video_intelligence.py +3 -2
- airflow/providers/google/cloud/hooks/vision.py +7 -7
- airflow/providers/google/cloud/hooks/workflows.py +4 -3
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -7
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -46
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -90
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -89
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +11 -61
- airflow/providers/google/cloud/links/managed_kafka.py +11 -51
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +166 -118
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +14 -9
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +141 -40
- airflow/providers/google/cloud/openlineage/mixins.py +14 -13
- airflow/providers/google/cloud/openlineage/utils.py +19 -3
- airflow/providers/google/cloud/operators/alloy_db.py +76 -61
- airflow/providers/google/cloud/operators/bigquery.py +104 -667
- airflow/providers/google/cloud/operators/bigquery_dts.py +12 -12
- airflow/providers/google/cloud/operators/bigtable.py +38 -7
- airflow/providers/google/cloud/operators/cloud_base.py +22 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +18 -18
- airflow/providers/google/cloud/operators/cloud_build.py +80 -36
- airflow/providers/google/cloud/operators/cloud_composer.py +157 -71
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +74 -46
- airflow/providers/google/cloud/operators/cloud_run.py +39 -20
- airflow/providers/google/cloud/operators/cloud_sql.py +46 -61
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -14
- airflow/providers/google/cloud/operators/compute.py +18 -50
- airflow/providers/google/cloud/operators/datacatalog.py +167 -29
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +19 -7
- airflow/providers/google/cloud/operators/datafusion.py +43 -43
- airflow/providers/google/cloud/operators/dataplex.py +212 -126
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +134 -207
- airflow/providers/google/cloud/operators/dataproc_metastore.py +102 -84
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +24 -45
- airflow/providers/google/cloud/operators/functions.py +21 -14
- airflow/providers/google/cloud/operators/gcs.py +15 -12
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +115 -106
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +362 -40
- airflow/providers/google/cloud/operators/natural_language.py +5 -3
- airflow/providers/google/cloud/operators/pubsub.py +69 -21
- airflow/providers/google/cloud/operators/spanner.py +53 -45
- airflow/providers/google/cloud/operators/speech_to_text.py +5 -4
- airflow/providers/google/cloud/operators/stackdriver.py +5 -11
- airflow/providers/google/cloud/operators/tasks.py +6 -15
- airflow/providers/google/cloud/operators/text_to_speech.py +4 -3
- airflow/providers/google/cloud/operators/translate.py +46 -20
- airflow/providers/google/cloud/operators/translate_speech.py +4 -3
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +44 -34
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +34 -12
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +62 -53
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +75 -11
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +48 -12
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +16 -12
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +62 -14
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +35 -10
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +5 -3
- airflow/providers/google/cloud/operators/vision.py +7 -5
- airflow/providers/google/cloud/operators/workflows.py +24 -19
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +6 -4
- airflow/providers/google/cloud/sensors/bigtable.py +14 -6
- airflow/providers/google/cloud/sensors/cloud_composer.py +535 -33
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +6 -5
- airflow/providers/google/cloud/sensors/dataflow.py +27 -10
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +7 -5
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +10 -9
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +4 -3
- airflow/providers/google/cloud/sensors/gcs.py +22 -21
- airflow/providers/google/cloud/sensors/looker.py +5 -5
- airflow/providers/google/cloud/sensors/pubsub.py +20 -20
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +6 -4
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +14 -13
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +18 -22
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +4 -5
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +45 -38
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +44 -12
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +36 -14
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_batch.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_build.py +3 -2
- airflow/providers/google/cloud/triggers/cloud_composer.py +303 -47
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +96 -5
- airflow/providers/google/cloud/triggers/dataflow.py +125 -2
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +16 -3
- airflow/providers/google/cloud/triggers/dataproc.py +124 -53
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +46 -28
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +17 -20
- airflow/providers/google/cloud/triggers/vertex_ai.py +8 -7
- airflow/providers/google/cloud/utils/bigquery.py +5 -7
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +4 -3
- airflow/providers/google/cloud/utils/dataform.py +1 -1
- airflow/providers/google/cloud/utils/external_token_supplier.py +0 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/cloud/utils/validators.py +43 -0
- airflow/providers/google/common/auth_backend/google_openid.py +26 -9
- airflow/providers/google/common/consts.py +2 -1
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +40 -43
- airflow/providers/google/common/hooks/operation_helpers.py +78 -0
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +4 -5
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +61 -216
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +30 -6
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +3 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -1
- airflow/providers/google/marketing_platform/links/analytics_admin.py +4 -5
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +7 -6
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +4 -64
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/drive.py +2 -2
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +117 -72
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +1 -1
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/example_dags/example_cloud_task.py +0 -54
- airflow/providers/google/cloud/hooks/automl.py +0 -679
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1360
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -1515
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
- apache_airflow_providers_google-14.0.0.dist-info/RECORD +0 -318
- /airflow/providers/google/cloud/{example_dags → bundles}/__init__.py +0 -0
- {apache_airflow_providers_google-14.0.0.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -23,13 +23,14 @@ import time
|
|
|
23
23
|
from collections.abc import Sequence
|
|
24
24
|
from typing import TYPE_CHECKING, Any
|
|
25
25
|
|
|
26
|
+
from google.cloud.compute_v1.services.instance_group_managers import InstanceGroupManagersClient
|
|
27
|
+
from google.cloud.compute_v1.services.instance_templates import InstanceTemplatesClient
|
|
28
|
+
from google.cloud.compute_v1.services.instances import InstancesClient
|
|
26
29
|
from googleapiclient.discovery import build
|
|
27
30
|
|
|
28
31
|
from airflow.exceptions import AirflowException
|
|
32
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
29
33
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
30
|
-
from google.cloud.compute_v1.services.instance_group_managers import InstanceGroupManagersClient
|
|
31
|
-
from google.cloud.compute_v1.services.instance_templates import InstanceTemplatesClient
|
|
32
|
-
from google.cloud.compute_v1.services.instances import InstancesClient
|
|
33
34
|
|
|
34
35
|
if TYPE_CHECKING:
|
|
35
36
|
from google.api_core.retry import Retry
|
|
@@ -85,15 +86,15 @@ class ComputeEngineHook(GoogleBaseHook):
|
|
|
85
86
|
|
|
86
87
|
def get_compute_instance_template_client(self):
|
|
87
88
|
"""Return Compute Engine Instance Template Client."""
|
|
88
|
-
return InstanceTemplatesClient(credentials=self.get_credentials(), client_info=
|
|
89
|
+
return InstanceTemplatesClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
|
89
90
|
|
|
90
91
|
def get_compute_instance_client(self):
|
|
91
92
|
"""Return Compute Engine Instance Client."""
|
|
92
|
-
return InstancesClient(credentials=self.get_credentials(), client_info=
|
|
93
|
+
return InstancesClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
|
93
94
|
|
|
94
95
|
def get_compute_instance_group_managers_client(self):
|
|
95
96
|
"""Return Compute Engine Instance Group Managers Client."""
|
|
96
|
-
return InstanceGroupManagersClient(credentials=self.get_credentials(), client_info=
|
|
97
|
+
return InstanceGroupManagersClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
|
97
98
|
|
|
98
99
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
99
100
|
def insert_instance_template(
|
|
@@ -31,7 +31,11 @@ from airflow.providers.google.cloud.hooks.compute import ComputeEngineHook
|
|
|
31
31
|
from airflow.providers.google.cloud.hooks.os_login import OSLoginHook
|
|
32
32
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
33
33
|
from airflow.providers.ssh.hooks.ssh import SSHHook
|
|
34
|
-
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
|
|
37
|
+
except ImportError:
|
|
38
|
+
from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
|
|
35
39
|
|
|
36
40
|
# Paramiko should be imported after airflow.providers.ssh. Then the import will fail with
|
|
37
41
|
# cannot import "airflow.providers.ssh" and will be correctly discovered as optional feature
|
|
@@ -148,8 +152,7 @@ class ComputeEngineSSHHook(SSHHook):
|
|
|
148
152
|
return ComputeEngineHook(
|
|
149
153
|
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
|
|
150
154
|
)
|
|
151
|
-
|
|
152
|
-
return ComputeEngineHook(gcp_conn_id=self.gcp_conn_id)
|
|
155
|
+
return ComputeEngineHook(gcp_conn_id=self.gcp_conn_id)
|
|
153
156
|
|
|
154
157
|
def _load_connection_config(self):
|
|
155
158
|
def _boolify(value):
|
|
@@ -158,7 +161,7 @@ class ComputeEngineSSHHook(SSHHook):
|
|
|
158
161
|
if isinstance(value, str):
|
|
159
162
|
if value.lower() == "false":
|
|
160
163
|
return False
|
|
161
|
-
|
|
164
|
+
if value.lower() == "true":
|
|
162
165
|
return True
|
|
163
166
|
return False
|
|
164
167
|
|
|
@@ -19,9 +19,6 @@ from __future__ import annotations
|
|
|
19
19
|
from collections.abc import Sequence
|
|
20
20
|
from typing import TYPE_CHECKING
|
|
21
21
|
|
|
22
|
-
from airflow.exceptions import AirflowException
|
|
23
|
-
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
24
|
-
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
25
22
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
26
23
|
from google.cloud import datacatalog
|
|
27
24
|
from google.cloud.datacatalog import (
|
|
@@ -35,11 +32,23 @@ from google.cloud.datacatalog import (
|
|
|
35
32
|
TagTemplateField,
|
|
36
33
|
)
|
|
37
34
|
|
|
35
|
+
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
36
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
37
|
+
from airflow.providers.google.common.deprecated import deprecated
|
|
38
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
39
|
+
|
|
38
40
|
if TYPE_CHECKING:
|
|
39
41
|
from google.api_core.retry import Retry
|
|
40
42
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
41
43
|
|
|
42
44
|
|
|
45
|
+
@deprecated(
|
|
46
|
+
planned_removal_date="January 30, 2026",
|
|
47
|
+
use_instead="airflow.providers.google.cloud.hooks.dataplex.DataplexHook",
|
|
48
|
+
reason="The Data Catalog will be discontinued on January 30, 2026 "
|
|
49
|
+
"in favor of Dataplex Universal Catalog.",
|
|
50
|
+
category=AirflowProviderDeprecationWarning,
|
|
51
|
+
)
|
|
43
52
|
class CloudDataCatalogHook(GoogleBaseHook):
|
|
44
53
|
"""
|
|
45
54
|
Hook for Google Cloud Data Catalog Service.
|
|
@@ -27,22 +27,10 @@ import subprocess
|
|
|
27
27
|
import time
|
|
28
28
|
import uuid
|
|
29
29
|
import warnings
|
|
30
|
-
from collections.abc import Generator, Sequence
|
|
30
|
+
from collections.abc import Callable, Generator, Sequence
|
|
31
31
|
from copy import deepcopy
|
|
32
|
-
from typing import TYPE_CHECKING, Any,
|
|
32
|
+
from typing import TYPE_CHECKING, Any, TypeVar, cast
|
|
33
33
|
|
|
34
|
-
from googleapiclient.discovery import Resource, build
|
|
35
|
-
|
|
36
|
-
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
37
|
-
from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
|
|
38
|
-
from airflow.providers.google.common.deprecated import deprecated
|
|
39
|
-
from airflow.providers.google.common.hooks.base_google import (
|
|
40
|
-
PROVIDE_PROJECT_ID,
|
|
41
|
-
GoogleBaseAsyncHook,
|
|
42
|
-
GoogleBaseHook,
|
|
43
|
-
)
|
|
44
|
-
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
45
|
-
from airflow.utils.timeout import timeout
|
|
46
34
|
from google.cloud.dataflow_v1beta3 import (
|
|
47
35
|
GetJobRequest,
|
|
48
36
|
Job,
|
|
@@ -59,6 +47,17 @@ from google.cloud.dataflow_v1beta3.types import (
|
|
|
59
47
|
JobMetrics,
|
|
60
48
|
)
|
|
61
49
|
from google.cloud.dataflow_v1beta3.types.jobs import ListJobsRequest
|
|
50
|
+
from googleapiclient.discovery import Resource, build
|
|
51
|
+
|
|
52
|
+
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
53
|
+
from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
|
|
54
|
+
from airflow.providers.common.compat.sdk import timeout
|
|
55
|
+
from airflow.providers.google.common.hooks.base_google import (
|
|
56
|
+
PROVIDE_PROJECT_ID,
|
|
57
|
+
GoogleBaseAsyncHook,
|
|
58
|
+
GoogleBaseHook,
|
|
59
|
+
)
|
|
60
|
+
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
62
61
|
|
|
63
62
|
if TYPE_CHECKING:
|
|
64
63
|
from google.cloud.dataflow_v1beta3.services.jobs_v1_beta3.pagers import ListJobsAsyncPager
|
|
@@ -136,7 +135,7 @@ def _fallback_variable_parameter(parameter_name: str, variable_key_name: str) ->
|
|
|
136
135
|
|
|
137
136
|
return func(self, *args, **kwargs)
|
|
138
137
|
|
|
139
|
-
return cast(T, inner_wrapper)
|
|
138
|
+
return cast("T", inner_wrapper)
|
|
140
139
|
|
|
141
140
|
return _wrapper
|
|
142
141
|
|
|
@@ -185,7 +184,67 @@ class DataflowJobType:
|
|
|
185
184
|
JOB_TYPE_STREAMING = "JOB_TYPE_STREAMING"
|
|
186
185
|
|
|
187
186
|
|
|
188
|
-
class
|
|
187
|
+
class DataflowJobTerminalStateHelper(LoggingMixin):
|
|
188
|
+
"""Helper to define and validate the dataflow job terminal state."""
|
|
189
|
+
|
|
190
|
+
@staticmethod
|
|
191
|
+
def expected_terminal_state_is_allowed(expected_terminal_state):
|
|
192
|
+
job_allowed_terminal_states = DataflowJobStatus.TERMINAL_STATES | {
|
|
193
|
+
DataflowJobStatus.JOB_STATE_RUNNING
|
|
194
|
+
}
|
|
195
|
+
if expected_terminal_state not in job_allowed_terminal_states:
|
|
196
|
+
raise AirflowException(
|
|
197
|
+
f"Google Cloud Dataflow job's expected terminal state "
|
|
198
|
+
f"'{expected_terminal_state}' is invalid."
|
|
199
|
+
f" The value should be any of the following: {job_allowed_terminal_states}"
|
|
200
|
+
)
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def expected_terminal_state_is_valid_for_job_type(expected_terminal_state, is_streaming: bool):
|
|
205
|
+
if is_streaming:
|
|
206
|
+
invalid_terminal_state = DataflowJobStatus.JOB_STATE_DONE
|
|
207
|
+
job_type = "streaming"
|
|
208
|
+
else:
|
|
209
|
+
invalid_terminal_state = DataflowJobStatus.JOB_STATE_DRAINED
|
|
210
|
+
job_type = "batch"
|
|
211
|
+
|
|
212
|
+
if expected_terminal_state == invalid_terminal_state:
|
|
213
|
+
raise AirflowException(
|
|
214
|
+
f"Google Cloud Dataflow job's expected terminal state cannot be {invalid_terminal_state} while it is a {job_type} job"
|
|
215
|
+
)
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
def job_reached_terminal_state(self, job, wait_until_finished=None, custom_terminal_state=None) -> bool:
|
|
219
|
+
"""
|
|
220
|
+
Check the job reached terminal state, if job failed raise exception.
|
|
221
|
+
|
|
222
|
+
:return: True if job is done.
|
|
223
|
+
:raise: Exception
|
|
224
|
+
"""
|
|
225
|
+
current_state = job["currentState"]
|
|
226
|
+
is_streaming = job.get("type") == DataflowJobType.JOB_TYPE_STREAMING
|
|
227
|
+
expected_terminal_state = (
|
|
228
|
+
DataflowJobStatus.JOB_STATE_RUNNING if is_streaming else DataflowJobStatus.JOB_STATE_DONE
|
|
229
|
+
)
|
|
230
|
+
if custom_terminal_state is not None:
|
|
231
|
+
expected_terminal_state = custom_terminal_state
|
|
232
|
+
self.expected_terminal_state_is_allowed(expected_terminal_state)
|
|
233
|
+
self.expected_terminal_state_is_valid_for_job_type(expected_terminal_state, is_streaming=is_streaming)
|
|
234
|
+
if current_state == expected_terminal_state:
|
|
235
|
+
if expected_terminal_state == DataflowJobStatus.JOB_STATE_RUNNING and wait_until_finished:
|
|
236
|
+
return False
|
|
237
|
+
return True
|
|
238
|
+
if current_state in DataflowJobStatus.AWAITING_STATES:
|
|
239
|
+
return wait_until_finished is False
|
|
240
|
+
self.log.debug("Current job: %s", job)
|
|
241
|
+
raise AirflowException(
|
|
242
|
+
f"Google Cloud Dataflow job {job['name']} is in an unexpected terminal state: {current_state}, "
|
|
243
|
+
f"expected terminal state: {expected_terminal_state}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class _DataflowJobsController(DataflowJobTerminalStateHelper):
|
|
189
248
|
"""
|
|
190
249
|
Interface for communication with Google Cloud Dataflow API.
|
|
191
250
|
|
|
@@ -262,15 +321,14 @@ class _DataflowJobsController(LoggingMixin):
|
|
|
262
321
|
"""
|
|
263
322
|
if not self._multiple_jobs and self._job_id:
|
|
264
323
|
return [self.fetch_job_by_id(self._job_id)]
|
|
265
|
-
|
|
324
|
+
if self._jobs:
|
|
266
325
|
return [self.fetch_job_by_id(job["id"]) for job in self._jobs]
|
|
267
|
-
|
|
326
|
+
if self._job_name:
|
|
268
327
|
jobs = self._fetch_jobs_by_prefix_name(self._job_name.lower())
|
|
269
328
|
if len(jobs) == 1:
|
|
270
329
|
self._job_id = jobs[0]["id"]
|
|
271
330
|
return jobs
|
|
272
|
-
|
|
273
|
-
raise ValueError("Missing both dataflow job ID and name.")
|
|
331
|
+
raise ValueError("Missing both dataflow job ID and name.")
|
|
274
332
|
|
|
275
333
|
def fetch_job_by_id(self, job_id: str) -> dict[str, str]:
|
|
276
334
|
"""
|
|
@@ -435,12 +493,12 @@ class _DataflowJobsController(LoggingMixin):
|
|
|
435
493
|
f"'{current_expected_state}' is invalid."
|
|
436
494
|
f" The value should be any of the following: {terminal_states}"
|
|
437
495
|
)
|
|
438
|
-
|
|
496
|
+
if is_streaming and current_expected_state == DataflowJobStatus.JOB_STATE_DONE:
|
|
439
497
|
raise AirflowException(
|
|
440
498
|
"Google Cloud Dataflow job's expected terminal state cannot be "
|
|
441
499
|
"JOB_STATE_DONE while it is a streaming job"
|
|
442
500
|
)
|
|
443
|
-
|
|
501
|
+
if not is_streaming and current_expected_state == DataflowJobStatus.JOB_STATE_DRAINED:
|
|
444
502
|
raise AirflowException(
|
|
445
503
|
"Google Cloud Dataflow job's expected terminal state cannot be "
|
|
446
504
|
"JOB_STATE_DRAINED while it is a batch job"
|
|
@@ -463,7 +521,10 @@ class _DataflowJobsController(LoggingMixin):
|
|
|
463
521
|
"""Wait for result of submitted job."""
|
|
464
522
|
self.log.info("Start waiting for done.")
|
|
465
523
|
self._refresh_jobs()
|
|
466
|
-
while self._jobs and not all(
|
|
524
|
+
while self._jobs and not all(
|
|
525
|
+
self.job_reached_terminal_state(job, self._wait_until_finished, self._expected_terminal_state)
|
|
526
|
+
for job in self._jobs
|
|
527
|
+
):
|
|
467
528
|
self.log.info("Waiting for done. Sleep %s s", self._poll_sleep)
|
|
468
529
|
time.sleep(self._poll_sleep)
|
|
469
530
|
self._refresh_jobs()
|
|
@@ -586,66 +647,6 @@ class DataflowHook(GoogleBaseHook):
|
|
|
586
647
|
http_authorized = self._authorize()
|
|
587
648
|
return build("datapipelines", "v1", http=http_authorized, cache_discovery=False)
|
|
588
649
|
|
|
589
|
-
@_fallback_to_location_from_variables
|
|
590
|
-
@_fallback_to_project_id_from_variables
|
|
591
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
|
592
|
-
@deprecated(
|
|
593
|
-
planned_removal_date="March 01, 2025",
|
|
594
|
-
use_instead="airflow.providers.apache.beam.hooks.beam.start.start_java_pipeline, "
|
|
595
|
-
"providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done",
|
|
596
|
-
instructions="Please use airflow.providers.apache.beam.hooks.beam.start.start_java_pipeline "
|
|
597
|
-
"to start pipeline and providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done method "
|
|
598
|
-
"to wait for the required pipeline state instead.",
|
|
599
|
-
category=AirflowProviderDeprecationWarning,
|
|
600
|
-
)
|
|
601
|
-
def start_java_dataflow(
|
|
602
|
-
self,
|
|
603
|
-
job_name: str,
|
|
604
|
-
variables: dict,
|
|
605
|
-
jar: str,
|
|
606
|
-
project_id: str,
|
|
607
|
-
job_class: str | None = None,
|
|
608
|
-
append_job_name: bool = True,
|
|
609
|
-
multiple_jobs: bool = False,
|
|
610
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
|
611
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
|
612
|
-
) -> None:
|
|
613
|
-
"""
|
|
614
|
-
Start Dataflow java job.
|
|
615
|
-
|
|
616
|
-
:param job_name: The name of the job.
|
|
617
|
-
:param variables: Variables passed to the job.
|
|
618
|
-
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
|
619
|
-
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
|
620
|
-
:param jar: Name of the jar for the job
|
|
621
|
-
:param job_class: Name of the java class for the job.
|
|
622
|
-
:param append_job_name: True if unique suffix has to be appended to job name.
|
|
623
|
-
:param multiple_jobs: True if to check for multiple job in dataflow
|
|
624
|
-
:param on_new_job_id_callback: Callback called when the job ID is known.
|
|
625
|
-
:param location: Job location.
|
|
626
|
-
"""
|
|
627
|
-
name = self.build_dataflow_job_name(job_name, append_job_name)
|
|
628
|
-
|
|
629
|
-
variables["jobName"] = name
|
|
630
|
-
variables["region"] = location
|
|
631
|
-
variables["project"] = project_id
|
|
632
|
-
|
|
633
|
-
if "labels" in variables:
|
|
634
|
-
variables["labels"] = json.dumps(variables["labels"], separators=(",", ":"))
|
|
635
|
-
|
|
636
|
-
self.beam_hook.start_java_pipeline(
|
|
637
|
-
variables=variables,
|
|
638
|
-
jar=jar,
|
|
639
|
-
job_class=job_class,
|
|
640
|
-
process_line_callback=process_line_and_extract_dataflow_job_id_callback(on_new_job_id_callback),
|
|
641
|
-
)
|
|
642
|
-
self.wait_for_done(
|
|
643
|
-
job_name=name,
|
|
644
|
-
location=location,
|
|
645
|
-
job_id=self.job_id,
|
|
646
|
-
multiple_jobs=multiple_jobs,
|
|
647
|
-
)
|
|
648
|
-
|
|
649
650
|
@_fallback_to_location_from_variables
|
|
650
651
|
@_fallback_to_project_id_from_variables
|
|
651
652
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
@@ -1004,7 +1005,7 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1004
1005
|
success_code = 0
|
|
1005
1006
|
|
|
1006
1007
|
with self.provide_authorized_gcloud():
|
|
1007
|
-
proc = subprocess.run(cmd, capture_output=True)
|
|
1008
|
+
proc = subprocess.run(cmd, check=False, capture_output=True)
|
|
1008
1009
|
|
|
1009
1010
|
if proc.returncode != success_code:
|
|
1010
1011
|
stderr_last_20_lines = "\n".join(proc.stderr.decode().strip().splitlines()[-20:])
|
|
@@ -1027,82 +1028,6 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1027
1028
|
"While reading job object after template execution error occurred. Job object has no id."
|
|
1028
1029
|
)
|
|
1029
1030
|
|
|
1030
|
-
@_fallback_to_location_from_variables
|
|
1031
|
-
@_fallback_to_project_id_from_variables
|
|
1032
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
|
1033
|
-
@deprecated(
|
|
1034
|
-
planned_removal_date="March 01, 2025",
|
|
1035
|
-
use_instead="airflow.providers.apache.beam.hooks.beam.start.start_python_pipeline method, "
|
|
1036
|
-
"providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done",
|
|
1037
|
-
instructions="Please use airflow.providers.apache.beam.hooks.beam.start.start_python_pipeline method "
|
|
1038
|
-
"to start pipeline and providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done method "
|
|
1039
|
-
"to wait for the required pipeline state instead.",
|
|
1040
|
-
category=AirflowProviderDeprecationWarning,
|
|
1041
|
-
)
|
|
1042
|
-
def start_python_dataflow(
|
|
1043
|
-
self,
|
|
1044
|
-
job_name: str,
|
|
1045
|
-
variables: dict,
|
|
1046
|
-
dataflow: str,
|
|
1047
|
-
py_options: list[str],
|
|
1048
|
-
project_id: str,
|
|
1049
|
-
py_interpreter: str = "python3",
|
|
1050
|
-
py_requirements: list[str] | None = None,
|
|
1051
|
-
py_system_site_packages: bool = False,
|
|
1052
|
-
append_job_name: bool = True,
|
|
1053
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
|
1054
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
|
1055
|
-
):
|
|
1056
|
-
"""
|
|
1057
|
-
Start Dataflow job.
|
|
1058
|
-
|
|
1059
|
-
:param job_name: The name of the job.
|
|
1060
|
-
:param variables: Variables passed to the job.
|
|
1061
|
-
:param dataflow: Name of the Dataflow process.
|
|
1062
|
-
:param py_options: Additional options.
|
|
1063
|
-
:param project_id: The ID of the GCP project that owns the job.
|
|
1064
|
-
If set to ``None`` or missing, the default project_id from the GCP connection is used.
|
|
1065
|
-
:param py_interpreter: Python version of the beam pipeline.
|
|
1066
|
-
If None, this defaults to the python3.
|
|
1067
|
-
To track python versions supported by beam and related
|
|
1068
|
-
issues check: https://issues.apache.org/jira/browse/BEAM-1251
|
|
1069
|
-
:param py_requirements: Additional python package(s) to install.
|
|
1070
|
-
If a value is passed to this parameter, a new virtual environment has been created with
|
|
1071
|
-
additional packages installed.
|
|
1072
|
-
|
|
1073
|
-
You could also install the apache-beam package if it is not installed on your system or you want
|
|
1074
|
-
to use a different version.
|
|
1075
|
-
:param py_system_site_packages: Whether to include system_site_packages in your virtualenv.
|
|
1076
|
-
See virtualenv documentation for more information.
|
|
1077
|
-
|
|
1078
|
-
This option is only relevant if the ``py_requirements`` parameter is not None.
|
|
1079
|
-
:param append_job_name: True if unique suffix has to be appended to job name.
|
|
1080
|
-
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
|
1081
|
-
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
|
1082
|
-
:param on_new_job_id_callback: Callback called when the job ID is known.
|
|
1083
|
-
:param location: Job location.
|
|
1084
|
-
"""
|
|
1085
|
-
name = self.build_dataflow_job_name(job_name, append_job_name)
|
|
1086
|
-
variables["job_name"] = name
|
|
1087
|
-
variables["region"] = location
|
|
1088
|
-
variables["project"] = project_id
|
|
1089
|
-
|
|
1090
|
-
self.beam_hook.start_python_pipeline(
|
|
1091
|
-
variables=variables,
|
|
1092
|
-
py_file=dataflow,
|
|
1093
|
-
py_options=py_options,
|
|
1094
|
-
py_interpreter=py_interpreter,
|
|
1095
|
-
py_requirements=py_requirements,
|
|
1096
|
-
py_system_site_packages=py_system_site_packages,
|
|
1097
|
-
process_line_callback=process_line_and_extract_dataflow_job_id_callback(on_new_job_id_callback),
|
|
1098
|
-
)
|
|
1099
|
-
|
|
1100
|
-
self.wait_for_done(
|
|
1101
|
-
job_name=name,
|
|
1102
|
-
location=location,
|
|
1103
|
-
job_id=self.job_id,
|
|
1104
|
-
)
|
|
1105
|
-
|
|
1106
1031
|
@staticmethod
|
|
1107
1032
|
def build_dataflow_job_name(job_name: str, append_job_name: bool = True) -> str:
|
|
1108
1033
|
"""Build Dataflow job name."""
|
|
@@ -1200,85 +1125,6 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1200
1125
|
)
|
|
1201
1126
|
jobs_controller.cancel()
|
|
1202
1127
|
|
|
1203
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
|
1204
|
-
def start_sql_job(
|
|
1205
|
-
self,
|
|
1206
|
-
job_name: str,
|
|
1207
|
-
query: str,
|
|
1208
|
-
options: dict[str, Any],
|
|
1209
|
-
project_id: str,
|
|
1210
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
|
1211
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
|
1212
|
-
on_new_job_callback: Callable[[dict], None] | None = None,
|
|
1213
|
-
):
|
|
1214
|
-
"""
|
|
1215
|
-
Start Dataflow SQL query.
|
|
1216
|
-
|
|
1217
|
-
:param job_name: The unique name to assign to the Cloud Dataflow job.
|
|
1218
|
-
:param query: The SQL query to execute.
|
|
1219
|
-
:param options: Job parameters to be executed.
|
|
1220
|
-
For more information, look at:
|
|
1221
|
-
`https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query
|
|
1222
|
-
<gcloud beta dataflow sql query>`__
|
|
1223
|
-
command reference
|
|
1224
|
-
:param location: The location of the Dataflow job (for example europe-west1)
|
|
1225
|
-
:param project_id: The ID of the GCP project that owns the job.
|
|
1226
|
-
If set to ``None`` or missing, the default project_id from the GCP connection is used.
|
|
1227
|
-
:param on_new_job_id_callback: (Deprecated) Callback called when the job ID is known.
|
|
1228
|
-
:param on_new_job_callback: Callback called when the job is known.
|
|
1229
|
-
:return: the new job object
|
|
1230
|
-
"""
|
|
1231
|
-
gcp_options = {
|
|
1232
|
-
"project": project_id,
|
|
1233
|
-
"format": "value(job.id)",
|
|
1234
|
-
"job-name": job_name,
|
|
1235
|
-
"region": location,
|
|
1236
|
-
}
|
|
1237
|
-
cmd = self._build_gcloud_command(
|
|
1238
|
-
command=["gcloud", "dataflow", "sql", "query", query], parameters={**gcp_options, **options}
|
|
1239
|
-
)
|
|
1240
|
-
self.log.info("Executing command: %s", " ".join(shlex.quote(c) for c in cmd))
|
|
1241
|
-
with self.provide_authorized_gcloud():
|
|
1242
|
-
proc = subprocess.run(cmd, capture_output=True)
|
|
1243
|
-
self.log.info("Output: %s", proc.stdout.decode())
|
|
1244
|
-
self.log.warning("Stderr: %s", proc.stderr.decode())
|
|
1245
|
-
self.log.info("Exit code %d", proc.returncode)
|
|
1246
|
-
stderr_last_20_lines = "\n".join(proc.stderr.decode().strip().splitlines()[-20:])
|
|
1247
|
-
if proc.returncode != 0:
|
|
1248
|
-
raise AirflowException(
|
|
1249
|
-
f"Process exit with non-zero exit code. Exit code: {proc.returncode} Error Details : "
|
|
1250
|
-
f"{stderr_last_20_lines}"
|
|
1251
|
-
)
|
|
1252
|
-
job_id = proc.stdout.decode().strip()
|
|
1253
|
-
|
|
1254
|
-
self.log.info("Created job ID: %s", job_id)
|
|
1255
|
-
|
|
1256
|
-
jobs_controller = _DataflowJobsController(
|
|
1257
|
-
dataflow=self.get_conn(),
|
|
1258
|
-
project_number=project_id,
|
|
1259
|
-
job_id=job_id,
|
|
1260
|
-
location=location,
|
|
1261
|
-
poll_sleep=self.poll_sleep,
|
|
1262
|
-
num_retries=self.num_retries,
|
|
1263
|
-
drain_pipeline=self.drain_pipeline,
|
|
1264
|
-
wait_until_finished=self.wait_until_finished,
|
|
1265
|
-
)
|
|
1266
|
-
job = jobs_controller.get_jobs(refresh=True)[0]
|
|
1267
|
-
|
|
1268
|
-
if on_new_job_id_callback:
|
|
1269
|
-
warnings.warn(
|
|
1270
|
-
"on_new_job_id_callback is Deprecated. Please start using on_new_job_callback",
|
|
1271
|
-
AirflowProviderDeprecationWarning,
|
|
1272
|
-
stacklevel=3,
|
|
1273
|
-
)
|
|
1274
|
-
on_new_job_id_callback(cast(str, job.get("id")))
|
|
1275
|
-
|
|
1276
|
-
if on_new_job_callback:
|
|
1277
|
-
on_new_job_callback(job)
|
|
1278
|
-
|
|
1279
|
-
jobs_controller.wait_for_done()
|
|
1280
|
-
return jobs_controller.get_jobs(refresh=True)[0]
|
|
1281
|
-
|
|
1282
1128
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
1283
1129
|
def get_job(
|
|
1284
1130
|
self,
|
|
@@ -1427,8 +1273,7 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1427
1273
|
location=location,
|
|
1428
1274
|
)
|
|
1429
1275
|
job = job_controller.fetch_job_by_id(job_id)
|
|
1430
|
-
|
|
1431
|
-
return job_controller._check_dataflow_job_state(job)
|
|
1276
|
+
return job_controller.job_reached_terminal_state(job)
|
|
1432
1277
|
|
|
1433
1278
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
1434
1279
|
def create_data_pipeline(
|
|
@@ -1557,7 +1402,7 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1557
1402
|
return f"projects/{project_id}/locations/{location}"
|
|
1558
1403
|
|
|
1559
1404
|
|
|
1560
|
-
class AsyncDataflowHook(GoogleBaseAsyncHook):
|
|
1405
|
+
class AsyncDataflowHook(GoogleBaseAsyncHook, DataflowJobTerminalStateHelper):
|
|
1561
1406
|
"""Async hook class for dataflow service."""
|
|
1562
1407
|
|
|
1563
1408
|
sync_hook_class = DataflowHook
|
|
@@ -20,8 +20,6 @@ import time
|
|
|
20
20
|
from collections.abc import Sequence
|
|
21
21
|
from typing import TYPE_CHECKING
|
|
22
22
|
|
|
23
|
-
from airflow.exceptions import AirflowException
|
|
24
|
-
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
|
25
23
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
|
26
24
|
from google.cloud.dataform_v1beta1 import DataformClient
|
|
27
25
|
from google.cloud.dataform_v1beta1.types import (
|
|
@@ -33,6 +31,9 @@ from google.cloud.dataform_v1beta1.types import (
|
|
|
33
31
|
WriteFileResponse,
|
|
34
32
|
)
|
|
35
33
|
|
|
34
|
+
from airflow.exceptions import AirflowException
|
|
35
|
+
from airflow.providers.google.common.hooks.base_google import GoogleBaseHook
|
|
36
|
+
|
|
36
37
|
if TYPE_CHECKING:
|
|
37
38
|
from google.api_core.retry import Retry
|
|
38
39
|
from google.cloud.dataform_v1beta1.services.dataform.pagers import QueryWorkflowInvocationActionsPager
|
|
@@ -452,8 +453,7 @@ class DataformHook(GoogleBaseHook):
|
|
|
452
453
|
"""
|
|
453
454
|
client = self.get_dataform_client()
|
|
454
455
|
workspace_path = (
|
|
455
|
-
f"projects/{project_id}/locations/{region}/"
|
|
456
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
|
456
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
|
457
457
|
)
|
|
458
458
|
request = {
|
|
459
459
|
"name": workspace_path,
|
|
@@ -495,8 +495,7 @@ class DataformHook(GoogleBaseHook):
|
|
|
495
495
|
"""
|
|
496
496
|
client = self.get_dataform_client()
|
|
497
497
|
workspace_path = (
|
|
498
|
-
f"projects/{project_id}/locations/{region}/"
|
|
499
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
|
498
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
|
500
499
|
)
|
|
501
500
|
request = {
|
|
502
501
|
"workspace": workspace_path,
|
|
@@ -541,8 +540,7 @@ class DataformHook(GoogleBaseHook):
|
|
|
541
540
|
"""
|
|
542
541
|
client = self.get_dataform_client()
|
|
543
542
|
workspace_path = (
|
|
544
|
-
f"projects/{project_id}/locations/{region}/"
|
|
545
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
|
543
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
|
546
544
|
)
|
|
547
545
|
request = {
|
|
548
546
|
"workspace": workspace_path,
|
|
@@ -586,8 +584,7 @@ class DataformHook(GoogleBaseHook):
|
|
|
586
584
|
"""
|
|
587
585
|
client = self.get_dataform_client()
|
|
588
586
|
workspace_path = (
|
|
589
|
-
f"projects/{project_id}/locations/{region}/"
|
|
590
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
|
587
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
|
591
588
|
)
|
|
592
589
|
request = {
|
|
593
590
|
"workspace": workspace_path,
|
|
@@ -628,8 +625,7 @@ class DataformHook(GoogleBaseHook):
|
|
|
628
625
|
"""
|
|
629
626
|
client = self.get_dataform_client()
|
|
630
627
|
workspace_path = (
|
|
631
|
-
f"projects/{project_id}/locations/{region}/"
|
|
632
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
|
628
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
|
633
629
|
)
|
|
634
630
|
request = {
|
|
635
631
|
"workspace": workspace_path,
|
|
@@ -670,8 +666,7 @@ class DataformHook(GoogleBaseHook):
|
|
|
670
666
|
"""
|
|
671
667
|
client = self.get_dataform_client()
|
|
672
668
|
workspace_path = (
|
|
673
|
-
f"projects/{project_id}/locations/{region}/"
|
|
674
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
|
669
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
|
675
670
|
)
|
|
676
671
|
request = {
|
|
677
672
|
"workspace": workspace_path,
|
|
@@ -26,11 +26,12 @@ from collections.abc import Sequence
|
|
|
26
26
|
from typing import Any
|
|
27
27
|
from urllib.parse import quote, urlencode, urljoin
|
|
28
28
|
|
|
29
|
+
import google.auth
|
|
29
30
|
from aiohttp import ClientSession
|
|
30
31
|
from gcloud.aio.auth import AioSession, Token
|
|
32
|
+
from google.api_core.retry import exponential_sleep_generator
|
|
31
33
|
from googleapiclient.discovery import Resource, build
|
|
32
34
|
|
|
33
|
-
import google.auth
|
|
34
35
|
from airflow.exceptions import AirflowException, AirflowNotFoundException
|
|
35
36
|
from airflow.providers.google.cloud.utils.datafusion import DataFusionPipelineType
|
|
36
37
|
from airflow.providers.google.common.hooks.base_google import (
|
|
@@ -38,7 +39,6 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
|
38
39
|
GoogleBaseAsyncHook,
|
|
39
40
|
GoogleBaseHook,
|
|
40
41
|
)
|
|
41
|
-
from google.api_core.retry import exponential_sleep_generator
|
|
42
42
|
|
|
43
43
|
Operation = dict[str, Any]
|
|
44
44
|
|
|
@@ -137,8 +137,7 @@ class DataFusionHook(GoogleBaseHook):
|
|
|
137
137
|
|
|
138
138
|
# Time is up!
|
|
139
139
|
raise AirflowException(
|
|
140
|
-
f"Pipeline {pipeline_name} state {current_state} is not "
|
|
141
|
-
f"one of {success_states} after {timeout}s"
|
|
140
|
+
f"Pipeline {pipeline_name} state {current_state} is not one of {success_states} after {timeout}s"
|
|
142
141
|
)
|
|
143
142
|
|
|
144
143
|
@staticmethod
|
|
@@ -171,9 +170,9 @@ class DataFusionHook(GoogleBaseHook):
|
|
|
171
170
|
def _check_response_status_and_data(response, message: str) -> None:
|
|
172
171
|
if response.status == 404:
|
|
173
172
|
raise AirflowNotFoundException(message)
|
|
174
|
-
|
|
173
|
+
if response.status == 409:
|
|
175
174
|
raise ConflictException("Conflict: Resource is still in use.")
|
|
176
|
-
|
|
175
|
+
if response.status != 200:
|
|
177
176
|
raise AirflowException(message)
|
|
178
177
|
if response.data is None:
|
|
179
178
|
raise AirflowException(
|
|
@@ -435,7 +434,7 @@ class DataFusionHook(GoogleBaseHook):
|
|
|
435
434
|
pipeline_id: str,
|
|
436
435
|
pipeline_type: DataFusionPipelineType = DataFusionPipelineType.BATCH,
|
|
437
436
|
namespace: str = "default",
|
|
438
|
-
) ->
|
|
437
|
+
) -> dict:
|
|
439
438
|
url = os.path.join(
|
|
440
439
|
self._base_url(instance_url, namespace),
|
|
441
440
|
quote(pipeline_name),
|
|
@@ -573,8 +572,7 @@ class DataFusionAsyncHook(GoogleBaseAsyncHook):
|
|
|
573
572
|
raise
|
|
574
573
|
if pipeline:
|
|
575
574
|
return pipeline
|
|
576
|
-
|
|
577
|
-
raise AirflowException("Could not retrieve pipeline. Aborting.")
|
|
575
|
+
raise AirflowException("Could not retrieve pipeline. Aborting.")
|
|
578
576
|
|
|
579
577
|
async def get_pipeline(
|
|
580
578
|
self,
|