apache-airflow-providers-google 15.1.0rc1__py3-none-any.whl → 19.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/3rd-party-licenses/NOTICE +2 -12
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/ads/hooks/ads.py +39 -5
- airflow/providers/google/ads/operators/ads.py +2 -2
- airflow/providers/google/ads/transfers/ads_to_gcs.py +2 -2
- airflow/providers/google/assets/gcs.py +1 -11
- airflow/providers/google/cloud/bundles/__init__.py +16 -0
- airflow/providers/google/cloud/bundles/gcs.py +161 -0
- airflow/providers/google/cloud/hooks/bigquery.py +166 -281
- airflow/providers/google/cloud/hooks/cloud_composer.py +287 -14
- airflow/providers/google/cloud/hooks/cloud_logging.py +109 -0
- airflow/providers/google/cloud/hooks/cloud_run.py +17 -9
- airflow/providers/google/cloud/hooks/cloud_sql.py +101 -22
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +27 -6
- airflow/providers/google/cloud/hooks/compute_ssh.py +5 -1
- airflow/providers/google/cloud/hooks/datacatalog.py +9 -1
- airflow/providers/google/cloud/hooks/dataflow.py +71 -94
- airflow/providers/google/cloud/hooks/datafusion.py +1 -1
- airflow/providers/google/cloud/hooks/dataplex.py +1 -1
- airflow/providers/google/cloud/hooks/dataprep.py +1 -1
- airflow/providers/google/cloud/hooks/dataproc.py +72 -71
- airflow/providers/google/cloud/hooks/gcs.py +111 -14
- airflow/providers/google/cloud/hooks/gen_ai.py +196 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/looker.py +6 -1
- airflow/providers/google/cloud/hooks/mlengine.py +3 -2
- airflow/providers/google/cloud/hooks/secret_manager.py +102 -10
- airflow/providers/google/cloud/hooks/spanner.py +73 -8
- airflow/providers/google/cloud/hooks/stackdriver.py +10 -8
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +0 -209
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +2 -2
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +27 -1
- airflow/providers/google/cloud/hooks/vertex_ai/experiment_service.py +202 -0
- airflow/providers/google/cloud/hooks/vertex_ai/feature_store.py +307 -7
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +79 -75
- airflow/providers/google/cloud/hooks/vertex_ai/ray.py +223 -0
- airflow/providers/google/cloud/hooks/vision.py +2 -2
- airflow/providers/google/cloud/hooks/workflows.py +1 -1
- airflow/providers/google/cloud/links/alloy_db.py +0 -46
- airflow/providers/google/cloud/links/base.py +77 -13
- airflow/providers/google/cloud/links/bigquery.py +0 -47
- airflow/providers/google/cloud/links/bigquery_dts.py +0 -20
- airflow/providers/google/cloud/links/bigtable.py +0 -48
- airflow/providers/google/cloud/links/cloud_build.py +0 -73
- airflow/providers/google/cloud/links/cloud_functions.py +0 -33
- airflow/providers/google/cloud/links/cloud_memorystore.py +0 -58
- airflow/providers/google/cloud/links/{life_sciences.py → cloud_run.py} +5 -27
- airflow/providers/google/cloud/links/cloud_sql.py +0 -33
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +17 -44
- airflow/providers/google/cloud/links/cloud_tasks.py +7 -26
- airflow/providers/google/cloud/links/compute.py +0 -58
- airflow/providers/google/cloud/links/data_loss_prevention.py +0 -169
- airflow/providers/google/cloud/links/datacatalog.py +23 -54
- airflow/providers/google/cloud/links/dataflow.py +0 -34
- airflow/providers/google/cloud/links/dataform.py +0 -64
- airflow/providers/google/cloud/links/datafusion.py +1 -96
- airflow/providers/google/cloud/links/dataplex.py +0 -154
- airflow/providers/google/cloud/links/dataprep.py +0 -24
- airflow/providers/google/cloud/links/dataproc.py +11 -95
- airflow/providers/google/cloud/links/datastore.py +0 -31
- airflow/providers/google/cloud/links/kubernetes_engine.py +9 -60
- airflow/providers/google/cloud/links/managed_kafka.py +0 -70
- airflow/providers/google/cloud/links/mlengine.py +0 -70
- airflow/providers/google/cloud/links/pubsub.py +0 -32
- airflow/providers/google/cloud/links/spanner.py +0 -33
- airflow/providers/google/cloud/links/stackdriver.py +0 -30
- airflow/providers/google/cloud/links/translate.py +17 -187
- airflow/providers/google/cloud/links/vertex_ai.py +28 -195
- airflow/providers/google/cloud/links/workflows.py +0 -52
- airflow/providers/google/cloud/log/gcs_task_handler.py +17 -9
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +9 -6
- airflow/providers/google/cloud/openlineage/CloudStorageTransferJobFacet.json +68 -0
- airflow/providers/google/cloud/openlineage/CloudStorageTransferRunFacet.json +60 -0
- airflow/providers/google/cloud/openlineage/DataFusionRunFacet.json +32 -0
- airflow/providers/google/cloud/openlineage/facets.py +102 -1
- airflow/providers/google/cloud/openlineage/mixins.py +10 -8
- airflow/providers/google/cloud/openlineage/utils.py +15 -1
- airflow/providers/google/cloud/operators/alloy_db.py +70 -55
- airflow/providers/google/cloud/operators/bigquery.py +73 -636
- airflow/providers/google/cloud/operators/bigquery_dts.py +3 -5
- airflow/providers/google/cloud/operators/bigtable.py +36 -7
- airflow/providers/google/cloud/operators/cloud_base.py +21 -1
- airflow/providers/google/cloud/operators/cloud_batch.py +2 -2
- airflow/providers/google/cloud/operators/cloud_build.py +75 -32
- airflow/providers/google/cloud/operators/cloud_composer.py +128 -40
- airflow/providers/google/cloud/operators/cloud_logging_sink.py +341 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +69 -43
- airflow/providers/google/cloud/operators/cloud_run.py +23 -5
- airflow/providers/google/cloud/operators/cloud_sql.py +8 -16
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +92 -11
- airflow/providers/google/cloud/operators/compute.py +8 -40
- airflow/providers/google/cloud/operators/datacatalog.py +157 -21
- airflow/providers/google/cloud/operators/dataflow.py +38 -15
- airflow/providers/google/cloud/operators/dataform.py +15 -5
- airflow/providers/google/cloud/operators/datafusion.py +41 -20
- airflow/providers/google/cloud/operators/dataplex.py +193 -109
- airflow/providers/google/cloud/operators/dataprep.py +1 -5
- airflow/providers/google/cloud/operators/dataproc.py +78 -35
- airflow/providers/google/cloud/operators/dataproc_metastore.py +96 -88
- airflow/providers/google/cloud/operators/datastore.py +22 -6
- airflow/providers/google/cloud/operators/dlp.py +6 -29
- airflow/providers/google/cloud/operators/functions.py +16 -7
- airflow/providers/google/cloud/operators/gcs.py +10 -8
- airflow/providers/google/cloud/operators/gen_ai.py +389 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +60 -99
- airflow/providers/google/cloud/operators/looker.py +1 -1
- airflow/providers/google/cloud/operators/managed_kafka.py +107 -52
- airflow/providers/google/cloud/operators/natural_language.py +1 -1
- airflow/providers/google/cloud/operators/pubsub.py +60 -14
- airflow/providers/google/cloud/operators/spanner.py +25 -12
- airflow/providers/google/cloud/operators/speech_to_text.py +1 -2
- airflow/providers/google/cloud/operators/stackdriver.py +1 -9
- airflow/providers/google/cloud/operators/tasks.py +1 -12
- airflow/providers/google/cloud/operators/text_to_speech.py +1 -2
- airflow/providers/google/cloud/operators/translate.py +40 -16
- airflow/providers/google/cloud/operators/translate_speech.py +1 -2
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +39 -19
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +29 -9
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +54 -26
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +70 -8
- airflow/providers/google/cloud/operators/vertex_ai/endpoint_service.py +43 -9
- airflow/providers/google/cloud/operators/vertex_ai/experiment_service.py +435 -0
- airflow/providers/google/cloud/operators/vertex_ai/feature_store.py +532 -1
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +135 -116
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +11 -9
- airflow/providers/google/cloud/operators/vertex_ai/model_service.py +57 -11
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +30 -7
- airflow/providers/google/cloud/operators/vertex_ai/ray.py +393 -0
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -1
- airflow/providers/google/cloud/operators/vision.py +2 -2
- airflow/providers/google/cloud/operators/workflows.py +18 -15
- airflow/providers/google/cloud/sensors/bigquery.py +2 -2
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -2
- airflow/providers/google/cloud/sensors/bigtable.py +11 -4
- airflow/providers/google/cloud/sensors/cloud_composer.py +533 -29
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/cloud/sensors/dataflow.py +26 -9
- airflow/providers/google/cloud/sensors/dataform.py +2 -2
- airflow/providers/google/cloud/sensors/datafusion.py +4 -4
- airflow/providers/google/cloud/sensors/dataplex.py +2 -2
- airflow/providers/google/cloud/sensors/dataprep.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc.py +2 -2
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +2 -2
- airflow/providers/google/cloud/sensors/gcs.py +4 -4
- airflow/providers/google/cloud/sensors/looker.py +2 -2
- airflow/providers/google/cloud/sensors/pubsub.py +4 -4
- airflow/providers/google/cloud/sensors/tasks.py +2 -2
- airflow/providers/google/cloud/sensors/vertex_ai/feature_store.py +2 -2
- airflow/providers/google/cloud/sensors/workflows.py +2 -2
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +11 -8
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +4 -4
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +7 -3
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +12 -1
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +24 -10
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +104 -5
- airflow/providers/google/cloud/transfers/calendar_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/cassandra_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +20 -12
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/gcs_to_local.py +5 -3
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +10 -4
- airflow/providers/google/cloud/transfers/gdrive_to_gcs.py +6 -2
- airflow/providers/google/cloud/transfers/gdrive_to_local.py +2 -2
- airflow/providers/google/cloud/transfers/http_to_gcs.py +193 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -1
- airflow/providers/google/cloud/transfers/oracle_to_gcs.py +36 -11
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +42 -9
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +12 -6
- airflow/providers/google/cloud/transfers/salesforce_to_gcs.py +2 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +13 -4
- airflow/providers/google/cloud/transfers/sheets_to_gcs.py +3 -3
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +10 -10
- airflow/providers/google/cloud/triggers/bigquery.py +75 -34
- airflow/providers/google/cloud/triggers/cloud_build.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_composer.py +302 -46
- airflow/providers/google/cloud/triggers/cloud_run.py +2 -2
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +91 -1
- airflow/providers/google/cloud/triggers/dataflow.py +122 -0
- airflow/providers/google/cloud/triggers/datafusion.py +1 -1
- airflow/providers/google/cloud/triggers/dataplex.py +14 -2
- airflow/providers/google/cloud/triggers/dataproc.py +122 -52
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +45 -27
- airflow/providers/google/cloud/triggers/mlengine.py +1 -1
- airflow/providers/google/cloud/triggers/pubsub.py +15 -19
- airflow/providers/google/cloud/utils/bigquery_get_data.py +1 -1
- airflow/providers/google/cloud/utils/credentials_provider.py +1 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -2
- airflow/providers/google/common/auth_backend/google_openid.py +4 -4
- airflow/providers/google/common/deprecated.py +2 -1
- airflow/providers/google/common/hooks/base_google.py +27 -8
- airflow/providers/google/common/links/storage.py +0 -22
- airflow/providers/google/common/utils/get_secret.py +31 -0
- airflow/providers/google/common/utils/id_token_credentials.py +3 -4
- airflow/providers/google/firebase/operators/firestore.py +2 -2
- airflow/providers/google/get_provider_info.py +56 -52
- airflow/providers/google/go_module_utils.py +35 -3
- airflow/providers/google/leveldb/hooks/leveldb.py +26 -1
- airflow/providers/google/leveldb/operators/leveldb.py +2 -2
- airflow/providers/google/marketing_platform/hooks/display_video.py +3 -109
- airflow/providers/google/marketing_platform/links/analytics_admin.py +5 -14
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +1 -2
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +5 -5
- airflow/providers/google/marketing_platform/operators/display_video.py +28 -489
- airflow/providers/google/marketing_platform/operators/search_ads.py +2 -2
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +2 -2
- airflow/providers/google/marketing_platform/sensors/display_video.py +3 -63
- airflow/providers/google/suite/hooks/calendar.py +1 -1
- airflow/providers/google/suite/hooks/sheets.py +15 -1
- airflow/providers/google/suite/operators/sheets.py +8 -3
- airflow/providers/google/suite/sensors/drive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +2 -2
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +1 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +3 -3
- airflow/providers/google/suite/transfers/sql_to_sheets.py +5 -4
- airflow/providers/google/version_compat.py +15 -1
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/METADATA +92 -48
- apache_airflow_providers_google-19.1.0rc1.dist-info/RECORD +331 -0
- apache_airflow_providers_google-19.1.0rc1.dist-info/licenses/NOTICE +5 -0
- airflow/providers/google/cloud/hooks/automl.py +0 -673
- airflow/providers/google/cloud/hooks/life_sciences.py +0 -159
- airflow/providers/google/cloud/links/automl.py +0 -193
- airflow/providers/google/cloud/operators/automl.py +0 -1362
- airflow/providers/google/cloud/operators/life_sciences.py +0 -119
- airflow/providers/google/cloud/operators/mlengine.py +0 -112
- apache_airflow_providers_google-15.1.0rc1.dist-info/RECORD +0 -321
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-15.1.0rc1.dist-info → apache_airflow_providers_google-19.1.0rc1.dist-info}/entry_points.txt +0 -0
- {airflow/providers/google → apache_airflow_providers_google-19.1.0rc1.dist-info/licenses}/LICENSE +0 -0
|
@@ -38,7 +38,7 @@ from inspect import signature
|
|
|
38
38
|
from pathlib import Path
|
|
39
39
|
from subprocess import PIPE, Popen
|
|
40
40
|
from tempfile import NamedTemporaryFile, _TemporaryFileWrapper, gettempdir
|
|
41
|
-
from typing import TYPE_CHECKING, Any
|
|
41
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
42
42
|
from urllib.parse import quote_plus
|
|
43
43
|
|
|
44
44
|
import httpx
|
|
@@ -50,8 +50,14 @@ from googleapiclient.errors import HttpError
|
|
|
50
50
|
# Number of retries - used by googleapiclient method calls to perform retries
|
|
51
51
|
# For requests that are "retriable"
|
|
52
52
|
from airflow.exceptions import AirflowException
|
|
53
|
-
from airflow.
|
|
54
|
-
|
|
53
|
+
from airflow.providers.google.version_compat import AIRFLOW_V_3_1_PLUS
|
|
54
|
+
|
|
55
|
+
if AIRFLOW_V_3_1_PLUS:
|
|
56
|
+
from airflow.sdk import Connection
|
|
57
|
+
else:
|
|
58
|
+
from airflow.models import Connection # type: ignore[assignment,attr-defined,no-redef]
|
|
59
|
+
|
|
60
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
55
61
|
from airflow.providers.google.cloud.hooks.secret_manager import (
|
|
56
62
|
GoogleCloudSecretManagerHook,
|
|
57
63
|
)
|
|
@@ -502,7 +508,7 @@ class CloudSqlProxyRunner(LoggingMixin):
|
|
|
502
508
|
:param project_id: Optional id of the Google Cloud project to connect to - it overwrites
|
|
503
509
|
default project id taken from the Google Cloud connection.
|
|
504
510
|
:param sql_proxy_version: Specific version of SQL proxy to download
|
|
505
|
-
(for example 'v1.13'). By default latest version is downloaded.
|
|
511
|
+
(for example 'v1.13'). By default, latest version is downloaded.
|
|
506
512
|
:param sql_proxy_binary_path: If specified, then proxy will be
|
|
507
513
|
used from the path specified rather than dynamically generated. This means
|
|
508
514
|
that if the binary is not present in that path it will also be downloaded.
|
|
@@ -687,7 +693,7 @@ class CloudSqlProxyRunner(LoggingMixin):
|
|
|
687
693
|
self.log.info("Skipped removing proxy - it was not downloaded: %s", self.sql_proxy_path)
|
|
688
694
|
if os.path.isfile(self.credentials_path):
|
|
689
695
|
self.log.info("Removing generated credentials file %s", self.credentials_path)
|
|
690
|
-
# Here file cannot be
|
|
696
|
+
# Here file cannot be deleted by concurrent task (each task has its own copy)
|
|
691
697
|
os.remove(self.credentials_path)
|
|
692
698
|
|
|
693
699
|
def get_proxy_version(self) -> str | None:
|
|
@@ -847,8 +853,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
847
853
|
self.user = self._get_iam_db_login()
|
|
848
854
|
self.password = self._generate_login_token(service_account=self.cloudsql_connection.login)
|
|
849
855
|
else:
|
|
850
|
-
self.user = self.cloudsql_connection.login
|
|
851
|
-
self.password = self.cloudsql_connection.password
|
|
856
|
+
self.user = cast("str", self.cloudsql_connection.login)
|
|
857
|
+
self.password = cast("str", self.cloudsql_connection.password)
|
|
852
858
|
self.public_ip = self.cloudsql_connection.host
|
|
853
859
|
self.public_port = self.cloudsql_connection.port
|
|
854
860
|
self.ssl_cert = ssl_cert
|
|
@@ -1045,15 +1051,26 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1045
1051
|
def _quote(value) -> str | None:
|
|
1046
1052
|
return quote_plus(value) if value else None
|
|
1047
1053
|
|
|
1048
|
-
def
|
|
1054
|
+
def _reserve_port(self):
|
|
1049
1055
|
if self.use_proxy:
|
|
1050
1056
|
if self.sql_proxy_use_tcp:
|
|
1051
1057
|
if not self.sql_proxy_tcp_port:
|
|
1052
1058
|
self.reserve_free_tcp_port()
|
|
1053
1059
|
if not self.sql_proxy_unique_path:
|
|
1054
1060
|
self.sql_proxy_unique_path = self._generate_unique_path()
|
|
1061
|
+
|
|
1062
|
+
def _generate_connection_uri(self) -> str:
|
|
1063
|
+
self._reserve_port()
|
|
1055
1064
|
if not self.database_type:
|
|
1056
1065
|
raise ValueError("The database_type should be set")
|
|
1066
|
+
if not self.user:
|
|
1067
|
+
raise AirflowException("The login parameter needs to be set in connection")
|
|
1068
|
+
if not self.public_ip:
|
|
1069
|
+
raise AirflowException("The location parameter needs to be set in connection")
|
|
1070
|
+
if not self.password:
|
|
1071
|
+
raise AirflowException("The password parameter needs to be set in connection")
|
|
1072
|
+
if not self.database:
|
|
1073
|
+
raise AirflowException("The database parameter needs to be set in connection")
|
|
1057
1074
|
|
|
1058
1075
|
database_uris = CONNECTION_URIS[self.database_type]
|
|
1059
1076
|
ssl_spec = None
|
|
@@ -1072,14 +1089,6 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1072
1089
|
ssl_spec = {"cert": self.sslcert, "key": self.sslkey, "ca": self.sslrootcert}
|
|
1073
1090
|
else:
|
|
1074
1091
|
format_string = public_uris["non-ssl"]
|
|
1075
|
-
if not self.user:
|
|
1076
|
-
raise AirflowException("The login parameter needs to be set in connection")
|
|
1077
|
-
if not self.public_ip:
|
|
1078
|
-
raise AirflowException("The location parameter needs to be set in connection")
|
|
1079
|
-
if not self.password:
|
|
1080
|
-
raise AirflowException("The password parameter needs to be set in connection")
|
|
1081
|
-
if not self.database:
|
|
1082
|
-
raise AirflowException("The database parameter needs to be set in connection")
|
|
1083
1092
|
|
|
1084
1093
|
connection_uri = format_string.format(
|
|
1085
1094
|
user=quote_plus(self.user) if self.user else "",
|
|
@@ -1103,6 +1112,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1103
1112
|
return connection_uri
|
|
1104
1113
|
|
|
1105
1114
|
def _get_instance_socket_name(self) -> str:
|
|
1115
|
+
if self.project_id is None:
|
|
1116
|
+
raise ValueError("The project_id should not be none")
|
|
1106
1117
|
return self.project_id + ":" + self.location + ":" + self.instance
|
|
1107
1118
|
|
|
1108
1119
|
def _get_sqlproxy_instance_specification(self) -> str:
|
|
@@ -1111,6 +1122,69 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1111
1122
|
instance_specification += f"=tcp:{self.sql_proxy_tcp_port}"
|
|
1112
1123
|
return instance_specification
|
|
1113
1124
|
|
|
1125
|
+
def _generate_connection_parameters(self) -> dict:
|
|
1126
|
+
self._reserve_port()
|
|
1127
|
+
if not self.database_type:
|
|
1128
|
+
raise ValueError("The database_type should be set")
|
|
1129
|
+
if not self.user:
|
|
1130
|
+
raise AirflowException("The login parameter needs to be set in connection")
|
|
1131
|
+
if not self.public_ip:
|
|
1132
|
+
raise AirflowException("The location parameter needs to be set in connection")
|
|
1133
|
+
if not self.password:
|
|
1134
|
+
raise AirflowException("The password parameter needs to be set in connection")
|
|
1135
|
+
if not self.database:
|
|
1136
|
+
raise AirflowException("The database parameter needs to be set in connection")
|
|
1137
|
+
|
|
1138
|
+
connection_parameters = {}
|
|
1139
|
+
|
|
1140
|
+
connection_parameters["conn_type"] = self.database_type
|
|
1141
|
+
connection_parameters["login"] = self.user
|
|
1142
|
+
connection_parameters["password"] = self.password
|
|
1143
|
+
connection_parameters["schema"] = self.database
|
|
1144
|
+
connection_parameters["extra"] = {}
|
|
1145
|
+
|
|
1146
|
+
database_uris = CONNECTION_URIS[self.database_type]
|
|
1147
|
+
if self.use_proxy:
|
|
1148
|
+
proxy_uris = database_uris["proxy"]
|
|
1149
|
+
if self.sql_proxy_use_tcp:
|
|
1150
|
+
connection_parameters["host"] = "127.0.0.1"
|
|
1151
|
+
connection_parameters["port"] = self.sql_proxy_tcp_port
|
|
1152
|
+
else:
|
|
1153
|
+
socket_path = f"{self.sql_proxy_unique_path}/{self._get_instance_socket_name()}"
|
|
1154
|
+
if "localhost" in proxy_uris["socket"]:
|
|
1155
|
+
connection_parameters["host"] = "localhost"
|
|
1156
|
+
connection_parameters["extra"].update({"unix_socket": socket_path})
|
|
1157
|
+
else:
|
|
1158
|
+
connection_parameters["host"] = socket_path
|
|
1159
|
+
else:
|
|
1160
|
+
public_uris = database_uris["public"]
|
|
1161
|
+
if self.use_ssl:
|
|
1162
|
+
connection_parameters["host"] = self.public_ip
|
|
1163
|
+
connection_parameters["port"] = self.public_port
|
|
1164
|
+
if "ssl_spec" in public_uris["ssl"]:
|
|
1165
|
+
connection_parameters["extra"].update(
|
|
1166
|
+
{
|
|
1167
|
+
"ssl": json.dumps(
|
|
1168
|
+
{"cert": self.sslcert, "key": self.sslkey, "ca": self.sslrootcert}
|
|
1169
|
+
)
|
|
1170
|
+
}
|
|
1171
|
+
)
|
|
1172
|
+
else:
|
|
1173
|
+
connection_parameters["extra"].update(
|
|
1174
|
+
{
|
|
1175
|
+
"sslmode": "verify-ca",
|
|
1176
|
+
"sslcert": self.sslcert,
|
|
1177
|
+
"sslkey": self.sslkey,
|
|
1178
|
+
"sslrootcert": self.sslrootcert,
|
|
1179
|
+
}
|
|
1180
|
+
)
|
|
1181
|
+
else:
|
|
1182
|
+
connection_parameters["host"] = self.public_ip
|
|
1183
|
+
connection_parameters["port"] = self.public_port
|
|
1184
|
+
if connection_parameters.get("extra"):
|
|
1185
|
+
connection_parameters["extra"] = json.dumps(connection_parameters["extra"])
|
|
1186
|
+
return connection_parameters
|
|
1187
|
+
|
|
1114
1188
|
def create_connection(self) -> Connection:
|
|
1115
1189
|
"""
|
|
1116
1190
|
Create a connection.
|
|
@@ -1118,8 +1192,11 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1118
1192
|
Connection ID will be randomly generated according to whether it uses
|
|
1119
1193
|
proxy, TCP, UNIX sockets, SSL.
|
|
1120
1194
|
"""
|
|
1121
|
-
|
|
1122
|
-
|
|
1195
|
+
if AIRFLOW_V_3_1_PLUS:
|
|
1196
|
+
kwargs = self._generate_connection_parameters()
|
|
1197
|
+
else:
|
|
1198
|
+
kwargs = {"uri": self._generate_connection_uri()}
|
|
1199
|
+
connection = Connection(conn_id=self.db_conn_id, **kwargs)
|
|
1123
1200
|
self.log.info("Creating connection %s", self.db_conn_id)
|
|
1124
1201
|
return connection
|
|
1125
1202
|
|
|
@@ -1135,6 +1212,8 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1135
1212
|
raise ValueError("Proxy runner can only be retrieved in case of use_proxy = True")
|
|
1136
1213
|
if not self.sql_proxy_unique_path:
|
|
1137
1214
|
raise ValueError("The sql_proxy_unique_path should be set")
|
|
1215
|
+
if self.project_id is None:
|
|
1216
|
+
raise ValueError("The project_id should not be None")
|
|
1138
1217
|
return CloudSqlProxyRunner(
|
|
1139
1218
|
path_prefix=self.sql_proxy_unique_path,
|
|
1140
1219
|
instance_specification=self._get_sqlproxy_instance_specification(),
|
|
@@ -1171,9 +1250,9 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1171
1250
|
raise ValueError("The db_hook should be set")
|
|
1172
1251
|
if not isinstance(self.db_hook, PostgresHook):
|
|
1173
1252
|
raise ValueError(f"The db_hook should be PostgresHook and is {type(self.db_hook)}")
|
|
1174
|
-
conn = getattr(self.db_hook, "conn")
|
|
1175
|
-
if conn and conn.notices:
|
|
1176
|
-
for output in
|
|
1253
|
+
conn = getattr(self.db_hook, "conn", None)
|
|
1254
|
+
if conn and hasattr(conn, "notices") and conn.notices:
|
|
1255
|
+
for output in conn.notices:
|
|
1177
1256
|
self.log.info(output)
|
|
1178
1257
|
|
|
1179
1258
|
def reserve_free_tcp_port(self) -> None:
|
|
@@ -1208,7 +1287,7 @@ class CloudSQLDatabaseHook(BaseHook):
|
|
|
1208
1287
|
cloud_sql_hook = CloudSQLHook(api_version="v1", gcp_conn_id=self.gcp_conn_id)
|
|
1209
1288
|
|
|
1210
1289
|
with cloud_sql_hook.provide_authorized_gcloud():
|
|
1211
|
-
proc = subprocess.run(cmd, capture_output=True)
|
|
1290
|
+
proc = subprocess.run(cmd, check=False, capture_output=True)
|
|
1212
1291
|
|
|
1213
1292
|
if proc.returncode != 0:
|
|
1214
1293
|
stderr_last_20_lines = "\n".join(proc.stderr.decode().strip().splitlines()[-20:])
|
|
@@ -36,13 +36,14 @@ from copy import deepcopy
|
|
|
36
36
|
from datetime import timedelta
|
|
37
37
|
from typing import TYPE_CHECKING, Any
|
|
38
38
|
|
|
39
|
-
from google.api_core import protobuf_helpers
|
|
40
39
|
from google.cloud.storage_transfer_v1 import (
|
|
41
40
|
ListTransferJobsRequest,
|
|
41
|
+
RunTransferJobRequest,
|
|
42
42
|
StorageTransferServiceAsyncClient,
|
|
43
43
|
TransferJob,
|
|
44
44
|
TransferOperation,
|
|
45
45
|
)
|
|
46
|
+
from google.protobuf.json_format import MessageToDict
|
|
46
47
|
from googleapiclient.discovery import Resource, build
|
|
47
48
|
from googleapiclient.errors import HttpError
|
|
48
49
|
|
|
@@ -55,10 +56,11 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
|
55
56
|
)
|
|
56
57
|
|
|
57
58
|
if TYPE_CHECKING:
|
|
59
|
+
from google.api_core import operation_async
|
|
58
60
|
from google.cloud.storage_transfer_v1.services.storage_transfer_service.pagers import (
|
|
59
61
|
ListTransferJobsAsyncPager,
|
|
60
62
|
)
|
|
61
|
-
from google.longrunning import operations_pb2
|
|
63
|
+
from google.longrunning import operations_pb2
|
|
62
64
|
from proto import Message
|
|
63
65
|
|
|
64
66
|
log = logging.getLogger(__name__)
|
|
@@ -603,7 +605,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
|
603
605
|
self,
|
|
604
606
|
request_filter: dict | None = None,
|
|
605
607
|
**kwargs,
|
|
606
|
-
) -> list[
|
|
608
|
+
) -> list[dict[str, Any]]:
|
|
607
609
|
"""
|
|
608
610
|
Get a transfer operation in Google Storage Transfer Service.
|
|
609
611
|
|
|
@@ -660,7 +662,12 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
|
660
662
|
)
|
|
661
663
|
|
|
662
664
|
transfer_operations = [
|
|
663
|
-
|
|
665
|
+
MessageToDict(
|
|
666
|
+
getattr(op, "_pb", op),
|
|
667
|
+
preserving_proto_field_name=True,
|
|
668
|
+
use_integers_for_enums=True,
|
|
669
|
+
)
|
|
670
|
+
for op in operations
|
|
664
671
|
]
|
|
665
672
|
|
|
666
673
|
return transfer_operations
|
|
@@ -677,7 +684,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
|
677
684
|
|
|
678
685
|
@staticmethod
|
|
679
686
|
async def operations_contain_expected_statuses(
|
|
680
|
-
operations: list[
|
|
687
|
+
operations: list[dict[str, Any]], expected_statuses: set[str] | str
|
|
681
688
|
) -> bool:
|
|
682
689
|
"""
|
|
683
690
|
Check whether an operation exists with the expected status.
|
|
@@ -696,7 +703,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
|
696
703
|
if not operations:
|
|
697
704
|
return False
|
|
698
705
|
|
|
699
|
-
current_statuses = {
|
|
706
|
+
current_statuses = {TransferOperation.Status(op["metadata"]["status"]).name for op in operations}
|
|
700
707
|
|
|
701
708
|
if len(current_statuses - expected_statuses_set) != len(current_statuses):
|
|
702
709
|
return True
|
|
@@ -707,3 +714,17 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
|
707
714
|
f"Expected: {', '.join(expected_statuses_set)}"
|
|
708
715
|
)
|
|
709
716
|
return False
|
|
717
|
+
|
|
718
|
+
async def run_transfer_job(self, job_name: str) -> operation_async.AsyncOperation:
|
|
719
|
+
"""
|
|
720
|
+
Run Google Storage Transfer Service job.
|
|
721
|
+
|
|
722
|
+
:param job_name: (Required) Name of the job to run.
|
|
723
|
+
"""
|
|
724
|
+
client = await self.get_conn()
|
|
725
|
+
request = RunTransferJobRequest(
|
|
726
|
+
job_name=job_name,
|
|
727
|
+
project_id=self.project_id,
|
|
728
|
+
)
|
|
729
|
+
operation = await client.run_transfer_job(request=request)
|
|
730
|
+
return operation
|
|
@@ -31,7 +31,11 @@ from airflow.providers.google.cloud.hooks.compute import ComputeEngineHook
|
|
|
31
31
|
from airflow.providers.google.cloud.hooks.os_login import OSLoginHook
|
|
32
32
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
33
33
|
from airflow.providers.ssh.hooks.ssh import SSHHook
|
|
34
|
-
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
from airflow.sdk.definitions._internal.types import NOTSET, ArgNotSet
|
|
37
|
+
except ImportError:
|
|
38
|
+
from airflow.utils.types import NOTSET, ArgNotSet # type: ignore[attr-defined,no-redef]
|
|
35
39
|
|
|
36
40
|
# Paramiko should be imported after airflow.providers.ssh. Then the import will fail with
|
|
37
41
|
# cannot import "airflow.providers.ssh" and will be correctly discovered as optional feature
|
|
@@ -32,8 +32,9 @@ from google.cloud.datacatalog import (
|
|
|
32
32
|
TagTemplateField,
|
|
33
33
|
)
|
|
34
34
|
|
|
35
|
-
from airflow.exceptions import AirflowException
|
|
35
|
+
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
36
36
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
|
37
|
+
from airflow.providers.google.common.deprecated import deprecated
|
|
37
38
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
|
38
39
|
|
|
39
40
|
if TYPE_CHECKING:
|
|
@@ -41,6 +42,13 @@ if TYPE_CHECKING:
|
|
|
41
42
|
from google.protobuf.field_mask_pb2 import FieldMask
|
|
42
43
|
|
|
43
44
|
|
|
45
|
+
@deprecated(
|
|
46
|
+
planned_removal_date="January 30, 2026",
|
|
47
|
+
use_instead="airflow.providers.google.cloud.hooks.dataplex.DataplexHook",
|
|
48
|
+
reason="The Data Catalog will be discontinued on January 30, 2026 "
|
|
49
|
+
"in favor of Dataplex Universal Catalog.",
|
|
50
|
+
category=AirflowProviderDeprecationWarning,
|
|
51
|
+
)
|
|
44
52
|
class CloudDataCatalogHook(GoogleBaseHook):
|
|
45
53
|
"""
|
|
46
54
|
Hook for Google Cloud Data Catalog Service.
|
|
@@ -27,9 +27,9 @@ import subprocess
|
|
|
27
27
|
import time
|
|
28
28
|
import uuid
|
|
29
29
|
import warnings
|
|
30
|
-
from collections.abc import Generator, Sequence
|
|
30
|
+
from collections.abc import Callable, Generator, Sequence
|
|
31
31
|
from copy import deepcopy
|
|
32
|
-
from typing import TYPE_CHECKING, Any,
|
|
32
|
+
from typing import TYPE_CHECKING, Any, TypeVar, cast
|
|
33
33
|
|
|
34
34
|
from google.cloud.dataflow_v1beta3 import (
|
|
35
35
|
GetJobRequest,
|
|
@@ -51,14 +51,13 @@ from googleapiclient.discovery import Resource, build
|
|
|
51
51
|
|
|
52
52
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
53
53
|
from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
|
|
54
|
-
from airflow.providers.
|
|
54
|
+
from airflow.providers.common.compat.sdk import timeout
|
|
55
55
|
from airflow.providers.google.common.hooks.base_google import (
|
|
56
56
|
PROVIDE_PROJECT_ID,
|
|
57
57
|
GoogleBaseAsyncHook,
|
|
58
58
|
GoogleBaseHook,
|
|
59
59
|
)
|
|
60
60
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
|
61
|
-
from airflow.utils.timeout import timeout
|
|
62
61
|
|
|
63
62
|
if TYPE_CHECKING:
|
|
64
63
|
from google.cloud.dataflow_v1beta3.services.jobs_v1_beta3.pagers import ListJobsAsyncPager
|
|
@@ -185,7 +184,67 @@ class DataflowJobType:
|
|
|
185
184
|
JOB_TYPE_STREAMING = "JOB_TYPE_STREAMING"
|
|
186
185
|
|
|
187
186
|
|
|
188
|
-
class
|
|
187
|
+
class DataflowJobTerminalStateHelper(LoggingMixin):
|
|
188
|
+
"""Helper to define and validate the dataflow job terminal state."""
|
|
189
|
+
|
|
190
|
+
@staticmethod
|
|
191
|
+
def expected_terminal_state_is_allowed(expected_terminal_state):
|
|
192
|
+
job_allowed_terminal_states = DataflowJobStatus.TERMINAL_STATES | {
|
|
193
|
+
DataflowJobStatus.JOB_STATE_RUNNING
|
|
194
|
+
}
|
|
195
|
+
if expected_terminal_state not in job_allowed_terminal_states:
|
|
196
|
+
raise AirflowException(
|
|
197
|
+
f"Google Cloud Dataflow job's expected terminal state "
|
|
198
|
+
f"'{expected_terminal_state}' is invalid."
|
|
199
|
+
f" The value should be any of the following: {job_allowed_terminal_states}"
|
|
200
|
+
)
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
@staticmethod
|
|
204
|
+
def expected_terminal_state_is_valid_for_job_type(expected_terminal_state, is_streaming: bool):
|
|
205
|
+
if is_streaming:
|
|
206
|
+
invalid_terminal_state = DataflowJobStatus.JOB_STATE_DONE
|
|
207
|
+
job_type = "streaming"
|
|
208
|
+
else:
|
|
209
|
+
invalid_terminal_state = DataflowJobStatus.JOB_STATE_DRAINED
|
|
210
|
+
job_type = "batch"
|
|
211
|
+
|
|
212
|
+
if expected_terminal_state == invalid_terminal_state:
|
|
213
|
+
raise AirflowException(
|
|
214
|
+
f"Google Cloud Dataflow job's expected terminal state cannot be {invalid_terminal_state} while it is a {job_type} job"
|
|
215
|
+
)
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
def job_reached_terminal_state(self, job, wait_until_finished=None, custom_terminal_state=None) -> bool:
|
|
219
|
+
"""
|
|
220
|
+
Check the job reached terminal state, if job failed raise exception.
|
|
221
|
+
|
|
222
|
+
:return: True if job is done.
|
|
223
|
+
:raise: Exception
|
|
224
|
+
"""
|
|
225
|
+
current_state = job["currentState"]
|
|
226
|
+
is_streaming = job.get("type") == DataflowJobType.JOB_TYPE_STREAMING
|
|
227
|
+
expected_terminal_state = (
|
|
228
|
+
DataflowJobStatus.JOB_STATE_RUNNING if is_streaming else DataflowJobStatus.JOB_STATE_DONE
|
|
229
|
+
)
|
|
230
|
+
if custom_terminal_state is not None:
|
|
231
|
+
expected_terminal_state = custom_terminal_state
|
|
232
|
+
self.expected_terminal_state_is_allowed(expected_terminal_state)
|
|
233
|
+
self.expected_terminal_state_is_valid_for_job_type(expected_terminal_state, is_streaming=is_streaming)
|
|
234
|
+
if current_state == expected_terminal_state:
|
|
235
|
+
if expected_terminal_state == DataflowJobStatus.JOB_STATE_RUNNING and wait_until_finished:
|
|
236
|
+
return False
|
|
237
|
+
return True
|
|
238
|
+
if current_state in DataflowJobStatus.AWAITING_STATES:
|
|
239
|
+
return wait_until_finished is False
|
|
240
|
+
self.log.debug("Current job: %s", job)
|
|
241
|
+
raise AirflowException(
|
|
242
|
+
f"Google Cloud Dataflow job {job['name']} is in an unexpected terminal state: {current_state}, "
|
|
243
|
+
f"expected terminal state: {expected_terminal_state}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class _DataflowJobsController(DataflowJobTerminalStateHelper):
|
|
189
248
|
"""
|
|
190
249
|
Interface for communication with Google Cloud Dataflow API.
|
|
191
250
|
|
|
@@ -462,7 +521,10 @@ class _DataflowJobsController(LoggingMixin):
|
|
|
462
521
|
"""Wait for result of submitted job."""
|
|
463
522
|
self.log.info("Start waiting for done.")
|
|
464
523
|
self._refresh_jobs()
|
|
465
|
-
while self._jobs and not all(
|
|
524
|
+
while self._jobs and not all(
|
|
525
|
+
self.job_reached_terminal_state(job, self._wait_until_finished, self._expected_terminal_state)
|
|
526
|
+
for job in self._jobs
|
|
527
|
+
):
|
|
466
528
|
self.log.info("Waiting for done. Sleep %s s", self._poll_sleep)
|
|
467
529
|
time.sleep(self._poll_sleep)
|
|
468
530
|
self._refresh_jobs()
|
|
@@ -943,7 +1005,7 @@ class DataflowHook(GoogleBaseHook):
|
|
|
943
1005
|
success_code = 0
|
|
944
1006
|
|
|
945
1007
|
with self.provide_authorized_gcloud():
|
|
946
|
-
proc = subprocess.run(cmd, capture_output=True)
|
|
1008
|
+
proc = subprocess.run(cmd, check=False, capture_output=True)
|
|
947
1009
|
|
|
948
1010
|
if proc.returncode != success_code:
|
|
949
1011
|
stderr_last_20_lines = "\n".join(proc.stderr.decode().strip().splitlines()[-20:])
|
|
@@ -1063,90 +1125,6 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1063
1125
|
)
|
|
1064
1126
|
jobs_controller.cancel()
|
|
1065
1127
|
|
|
1066
|
-
@deprecated(
|
|
1067
|
-
planned_removal_date="July 01, 2025",
|
|
1068
|
-
use_instead="airflow.providers.google.cloud.hooks.dataflow.DataflowHook.launch_beam_yaml_job",
|
|
1069
|
-
category=AirflowProviderDeprecationWarning,
|
|
1070
|
-
)
|
|
1071
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
|
1072
|
-
def start_sql_job(
|
|
1073
|
-
self,
|
|
1074
|
-
job_name: str,
|
|
1075
|
-
query: str,
|
|
1076
|
-
options: dict[str, Any],
|
|
1077
|
-
project_id: str,
|
|
1078
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
|
1079
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
|
1080
|
-
on_new_job_callback: Callable[[dict], None] | None = None,
|
|
1081
|
-
):
|
|
1082
|
-
"""
|
|
1083
|
-
Start Dataflow SQL query.
|
|
1084
|
-
|
|
1085
|
-
:param job_name: The unique name to assign to the Cloud Dataflow job.
|
|
1086
|
-
:param query: The SQL query to execute.
|
|
1087
|
-
:param options: Job parameters to be executed.
|
|
1088
|
-
For more information, look at:
|
|
1089
|
-
`https://cloud.google.com/sdk/gcloud/reference/beta/dataflow/sql/query
|
|
1090
|
-
<gcloud beta dataflow sql query>`__
|
|
1091
|
-
command reference
|
|
1092
|
-
:param location: The location of the Dataflow job (for example europe-west1)
|
|
1093
|
-
:param project_id: The ID of the GCP project that owns the job.
|
|
1094
|
-
If set to ``None`` or missing, the default project_id from the GCP connection is used.
|
|
1095
|
-
:param on_new_job_id_callback: (Deprecated) Callback called when the job ID is known.
|
|
1096
|
-
:param on_new_job_callback: Callback called when the job is known.
|
|
1097
|
-
:return: the new job object
|
|
1098
|
-
"""
|
|
1099
|
-
gcp_options = {
|
|
1100
|
-
"project": project_id,
|
|
1101
|
-
"format": "value(job.id)",
|
|
1102
|
-
"job-name": job_name,
|
|
1103
|
-
"region": location,
|
|
1104
|
-
}
|
|
1105
|
-
cmd = self._build_gcloud_command(
|
|
1106
|
-
command=["gcloud", "dataflow", "sql", "query", query], parameters={**gcp_options, **options}
|
|
1107
|
-
)
|
|
1108
|
-
self.log.info("Executing command: %s", " ".join(shlex.quote(c) for c in cmd))
|
|
1109
|
-
with self.provide_authorized_gcloud():
|
|
1110
|
-
proc = subprocess.run(cmd, capture_output=True)
|
|
1111
|
-
self.log.info("Output: %s", proc.stdout.decode())
|
|
1112
|
-
self.log.warning("Stderr: %s", proc.stderr.decode())
|
|
1113
|
-
self.log.info("Exit code %d", proc.returncode)
|
|
1114
|
-
stderr_last_20_lines = "\n".join(proc.stderr.decode().strip().splitlines()[-20:])
|
|
1115
|
-
if proc.returncode != 0:
|
|
1116
|
-
raise AirflowException(
|
|
1117
|
-
f"Process exit with non-zero exit code. Exit code: {proc.returncode} Error Details : "
|
|
1118
|
-
f"{stderr_last_20_lines}"
|
|
1119
|
-
)
|
|
1120
|
-
job_id = proc.stdout.decode().strip()
|
|
1121
|
-
|
|
1122
|
-
self.log.info("Created job ID: %s", job_id)
|
|
1123
|
-
|
|
1124
|
-
jobs_controller = _DataflowJobsController(
|
|
1125
|
-
dataflow=self.get_conn(),
|
|
1126
|
-
project_number=project_id,
|
|
1127
|
-
job_id=job_id,
|
|
1128
|
-
location=location,
|
|
1129
|
-
poll_sleep=self.poll_sleep,
|
|
1130
|
-
num_retries=self.num_retries,
|
|
1131
|
-
drain_pipeline=self.drain_pipeline,
|
|
1132
|
-
wait_until_finished=self.wait_until_finished,
|
|
1133
|
-
)
|
|
1134
|
-
job = jobs_controller.get_jobs(refresh=True)[0]
|
|
1135
|
-
|
|
1136
|
-
if on_new_job_id_callback:
|
|
1137
|
-
warnings.warn(
|
|
1138
|
-
"on_new_job_id_callback is Deprecated. Please start using on_new_job_callback",
|
|
1139
|
-
AirflowProviderDeprecationWarning,
|
|
1140
|
-
stacklevel=3,
|
|
1141
|
-
)
|
|
1142
|
-
on_new_job_id_callback(cast("str", job.get("id")))
|
|
1143
|
-
|
|
1144
|
-
if on_new_job_callback:
|
|
1145
|
-
on_new_job_callback(job)
|
|
1146
|
-
|
|
1147
|
-
jobs_controller.wait_for_done()
|
|
1148
|
-
return jobs_controller.get_jobs(refresh=True)[0]
|
|
1149
|
-
|
|
1150
1128
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
1151
1129
|
def get_job(
|
|
1152
1130
|
self,
|
|
@@ -1295,8 +1273,7 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1295
1273
|
location=location,
|
|
1296
1274
|
)
|
|
1297
1275
|
job = job_controller.fetch_job_by_id(job_id)
|
|
1298
|
-
|
|
1299
|
-
return job_controller._check_dataflow_job_state(job)
|
|
1276
|
+
return job_controller.job_reached_terminal_state(job)
|
|
1300
1277
|
|
|
1301
1278
|
@GoogleBaseHook.fallback_to_default_project_id
|
|
1302
1279
|
def create_data_pipeline(
|
|
@@ -1425,7 +1402,7 @@ class DataflowHook(GoogleBaseHook):
|
|
|
1425
1402
|
return f"projects/{project_id}/locations/{location}"
|
|
1426
1403
|
|
|
1427
1404
|
|
|
1428
|
-
class AsyncDataflowHook(GoogleBaseAsyncHook):
|
|
1405
|
+
class AsyncDataflowHook(GoogleBaseAsyncHook, DataflowJobTerminalStateHelper):
|
|
1429
1406
|
"""Async hook class for dataflow service."""
|
|
1430
1407
|
|
|
1431
1408
|
sync_hook_class = DataflowHook
|
|
@@ -434,7 +434,7 @@ class DataFusionHook(GoogleBaseHook):
|
|
|
434
434
|
pipeline_id: str,
|
|
435
435
|
pipeline_type: DataFusionPipelineType = DataFusionPipelineType.BATCH,
|
|
436
436
|
namespace: str = "default",
|
|
437
|
-
) ->
|
|
437
|
+
) -> dict:
|
|
438
438
|
url = os.path.join(
|
|
439
439
|
self._base_url(instance_url, namespace),
|
|
440
440
|
quote(pipeline_name),
|
|
@@ -515,7 +515,7 @@ class DataplexHook(GoogleBaseHook, OperationHelper):
|
|
|
515
515
|
fields are non-required and omitted in the request body, their values are emptied.
|
|
516
516
|
:param allow_missing: Optional. If set to true and entry doesn't exist, the service will create it.
|
|
517
517
|
:param delete_missing_aspects: Optional. If set to true and the aspect_keys specify aspect
|
|
518
|
-
ranges, the service deletes any existing aspects from that range that
|
|
518
|
+
ranges, the service deletes any existing aspects from that range that were not provided
|
|
519
519
|
in the request.
|
|
520
520
|
:param aspect_keys: Optional. The map keys of the Aspects which the service should modify.
|
|
521
521
|
It supports the following syntax:
|
|
@@ -28,7 +28,7 @@ import requests
|
|
|
28
28
|
from requests import HTTPError
|
|
29
29
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
30
30
|
|
|
31
|
-
from airflow.
|
|
31
|
+
from airflow.providers.common.compat.sdk import BaseHook
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def _get_field(extras: dict, field_name: str) -> str | None:
|