apache-airflow-providers-google 10.16.0__py3-none-any.whl → 10.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +5 -4
- airflow/providers/google/ads/operators/ads.py +1 -0
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +1 -0
- airflow/providers/google/cloud/example_dags/example_cloud_task.py +1 -0
- airflow/providers/google/cloud/example_dags/example_facebook_ads_to_gcs.py +1 -0
- airflow/providers/google/cloud/example_dags/example_looker.py +1 -0
- airflow/providers/google/cloud/example_dags/example_presto_to_gcs.py +1 -0
- airflow/providers/google/cloud/example_dags/example_salesforce_to_gcs.py +1 -0
- airflow/providers/google/cloud/fs/gcs.py +1 -2
- airflow/providers/google/cloud/hooks/automl.py +1 -0
- airflow/providers/google/cloud/hooks/bigquery.py +87 -24
- airflow/providers/google/cloud/hooks/bigquery_dts.py +1 -0
- airflow/providers/google/cloud/hooks/bigtable.py +1 -0
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -0
- airflow/providers/google/cloud/hooks/cloud_memorystore.py +1 -0
- airflow/providers/google/cloud/hooks/cloud_sql.py +1 -0
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +9 -4
- airflow/providers/google/cloud/hooks/compute.py +1 -0
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -2
- airflow/providers/google/cloud/hooks/dataflow.py +6 -5
- airflow/providers/google/cloud/hooks/datafusion.py +1 -0
- airflow/providers/google/cloud/hooks/datapipeline.py +1 -0
- airflow/providers/google/cloud/hooks/dataplex.py +1 -0
- airflow/providers/google/cloud/hooks/dataprep.py +1 -0
- airflow/providers/google/cloud/hooks/dataproc.py +3 -2
- airflow/providers/google/cloud/hooks/dataproc_metastore.py +1 -0
- airflow/providers/google/cloud/hooks/datastore.py +1 -0
- airflow/providers/google/cloud/hooks/dlp.py +1 -0
- airflow/providers/google/cloud/hooks/functions.py +1 -0
- airflow/providers/google/cloud/hooks/gcs.py +12 -5
- airflow/providers/google/cloud/hooks/kms.py +1 -0
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +178 -300
- airflow/providers/google/cloud/hooks/life_sciences.py +1 -0
- airflow/providers/google/cloud/hooks/looker.py +1 -0
- airflow/providers/google/cloud/hooks/mlengine.py +1 -0
- airflow/providers/google/cloud/hooks/natural_language.py +1 -0
- airflow/providers/google/cloud/hooks/os_login.py +1 -0
- airflow/providers/google/cloud/hooks/pubsub.py +1 -0
- airflow/providers/google/cloud/hooks/secret_manager.py +1 -0
- airflow/providers/google/cloud/hooks/spanner.py +1 -0
- airflow/providers/google/cloud/hooks/speech_to_text.py +1 -0
- airflow/providers/google/cloud/hooks/stackdriver.py +1 -0
- airflow/providers/google/cloud/hooks/text_to_speech.py +1 -0
- airflow/providers/google/cloud/hooks/translate.py +1 -0
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +1 -0
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +255 -3
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1 -0
- airflow/providers/google/cloud/hooks/vertex_ai/dataset.py +1 -0
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +1 -0
- airflow/providers/google/cloud/hooks/vertex_ai/generative_model.py +197 -0
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +9 -9
- airflow/providers/google/cloud/hooks/vertex_ai/pipeline_job.py +231 -12
- airflow/providers/google/cloud/hooks/video_intelligence.py +1 -0
- airflow/providers/google/cloud/hooks/vision.py +1 -0
- airflow/providers/google/cloud/links/automl.py +1 -0
- airflow/providers/google/cloud/links/bigquery.py +1 -0
- airflow/providers/google/cloud/links/bigquery_dts.py +1 -0
- airflow/providers/google/cloud/links/cloud_memorystore.py +1 -0
- airflow/providers/google/cloud/links/cloud_sql.py +1 -0
- airflow/providers/google/cloud/links/cloud_tasks.py +1 -0
- airflow/providers/google/cloud/links/compute.py +1 -0
- airflow/providers/google/cloud/links/datacatalog.py +1 -0
- airflow/providers/google/cloud/links/dataflow.py +1 -0
- airflow/providers/google/cloud/links/dataform.py +1 -0
- airflow/providers/google/cloud/links/datafusion.py +1 -0
- airflow/providers/google/cloud/links/dataplex.py +1 -0
- airflow/providers/google/cloud/links/dataproc.py +1 -0
- airflow/providers/google/cloud/links/kubernetes_engine.py +28 -0
- airflow/providers/google/cloud/links/mlengine.py +1 -0
- airflow/providers/google/cloud/links/pubsub.py +1 -0
- airflow/providers/google/cloud/links/spanner.py +1 -0
- airflow/providers/google/cloud/links/stackdriver.py +1 -0
- airflow/providers/google/cloud/links/workflows.py +1 -0
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +18 -4
- airflow/providers/google/cloud/operators/automl.py +1 -0
- airflow/providers/google/cloud/operators/bigquery.py +21 -0
- airflow/providers/google/cloud/operators/bigquery_dts.py +1 -0
- airflow/providers/google/cloud/operators/bigtable.py +1 -0
- airflow/providers/google/cloud/operators/cloud_base.py +1 -0
- airflow/providers/google/cloud/operators/cloud_build.py +1 -0
- airflow/providers/google/cloud/operators/cloud_memorystore.py +1 -0
- airflow/providers/google/cloud/operators/cloud_sql.py +1 -0
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +11 -5
- airflow/providers/google/cloud/operators/compute.py +1 -0
- airflow/providers/google/cloud/operators/dataflow.py +1 -0
- airflow/providers/google/cloud/operators/datafusion.py +1 -0
- airflow/providers/google/cloud/operators/datapipeline.py +1 -0
- airflow/providers/google/cloud/operators/dataprep.py +1 -0
- airflow/providers/google/cloud/operators/dataproc.py +3 -2
- airflow/providers/google/cloud/operators/dataproc_metastore.py +1 -0
- airflow/providers/google/cloud/operators/datastore.py +1 -0
- airflow/providers/google/cloud/operators/functions.py +1 -0
- airflow/providers/google/cloud/operators/gcs.py +1 -0
- airflow/providers/google/cloud/operators/kubernetes_engine.py +600 -4
- airflow/providers/google/cloud/operators/life_sciences.py +1 -0
- airflow/providers/google/cloud/operators/looker.py +1 -0
- airflow/providers/google/cloud/operators/mlengine.py +283 -259
- airflow/providers/google/cloud/operators/natural_language.py +1 -0
- airflow/providers/google/cloud/operators/pubsub.py +1 -0
- airflow/providers/google/cloud/operators/spanner.py +1 -0
- airflow/providers/google/cloud/operators/speech_to_text.py +1 -0
- airflow/providers/google/cloud/operators/text_to_speech.py +1 -0
- airflow/providers/google/cloud/operators/translate.py +1 -0
- airflow/providers/google/cloud/operators/translate_speech.py +1 -0
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +14 -7
- airflow/providers/google/cloud/operators/vertex_ai/batch_prediction_job.py +67 -13
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +26 -8
- airflow/providers/google/cloud/operators/vertex_ai/dataset.py +1 -0
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +306 -0
- airflow/providers/google/cloud/operators/vertex_ai/hyperparameter_tuning_job.py +29 -48
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +52 -17
- airflow/providers/google/cloud/operators/video_intelligence.py +1 -0
- airflow/providers/google/cloud/operators/vision.py +1 -0
- airflow/providers/google/cloud/secrets/secret_manager.py +1 -0
- airflow/providers/google/cloud/sensors/bigquery.py +1 -0
- airflow/providers/google/cloud/sensors/bigquery_dts.py +1 -0
- airflow/providers/google/cloud/sensors/bigtable.py +1 -0
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +1 -0
- airflow/providers/google/cloud/sensors/dataflow.py +1 -0
- airflow/providers/google/cloud/sensors/dataform.py +1 -0
- airflow/providers/google/cloud/sensors/datafusion.py +1 -0
- airflow/providers/google/cloud/sensors/dataplex.py +1 -0
- airflow/providers/google/cloud/sensors/dataprep.py +1 -0
- airflow/providers/google/cloud/sensors/dataproc.py +1 -0
- airflow/providers/google/cloud/sensors/gcs.py +1 -0
- airflow/providers/google/cloud/sensors/looker.py +1 -0
- airflow/providers/google/cloud/sensors/pubsub.py +1 -0
- airflow/providers/google/cloud/sensors/tasks.py +1 -0
- airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +1 -0
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -0
- airflow/providers/google/cloud/transfers/bigquery_to_mysql.py +1 -0
- airflow/providers/google/cloud/transfers/bigquery_to_postgres.py +1 -0
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +3 -2
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_sftp.py +1 -0
- airflow/providers/google/cloud/transfers/local_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/mysql_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/postgres_to_gcs.py +19 -1
- airflow/providers/google/cloud/transfers/s3_to_gcs.py +3 -5
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +1 -0
- airflow/providers/google/cloud/transfers/sql_to_gcs.py +4 -2
- airflow/providers/google/cloud/triggers/bigquery.py +4 -3
- airflow/providers/google/cloud/triggers/cloud_batch.py +1 -1
- airflow/providers/google/cloud/triggers/cloud_run.py +1 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -0
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +14 -2
- airflow/providers/google/cloud/triggers/dataplex.py +1 -0
- airflow/providers/google/cloud/triggers/dataproc.py +1 -0
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +72 -2
- airflow/providers/google/cloud/triggers/mlengine.py +2 -0
- airflow/providers/google/cloud/triggers/pubsub.py +3 -3
- airflow/providers/google/cloud/triggers/vertex_ai.py +107 -15
- airflow/providers/google/cloud/utils/field_sanitizer.py +2 -1
- airflow/providers/google/cloud/utils/field_validator.py +1 -0
- airflow/providers/google/cloud/utils/helpers.py +1 -0
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +1 -0
- airflow/providers/google/cloud/utils/mlengine_prediction_summary.py +1 -0
- airflow/providers/google/cloud/utils/openlineage.py +1 -0
- airflow/providers/google/common/auth_backend/google_openid.py +1 -0
- airflow/providers/google/common/hooks/base_google.py +2 -1
- airflow/providers/google/common/hooks/discovery_api.py +1 -0
- airflow/providers/google/common/links/storage.py +1 -0
- airflow/providers/google/common/utils/id_token_credentials.py +1 -0
- airflow/providers/google/firebase/hooks/firestore.py +1 -0
- airflow/providers/google/get_provider_info.py +9 -3
- airflow/providers/google/go_module_utils.py +1 -0
- airflow/providers/google/leveldb/hooks/leveldb.py +8 -7
- airflow/providers/google/marketing_platform/example_dags/example_display_video.py +1 -0
- airflow/providers/google/marketing_platform/hooks/analytics_admin.py +1 -0
- airflow/providers/google/marketing_platform/hooks/campaign_manager.py +1 -0
- airflow/providers/google/marketing_platform/hooks/display_video.py +1 -0
- airflow/providers/google/marketing_platform/hooks/search_ads.py +1 -0
- airflow/providers/google/marketing_platform/operators/analytics.py +1 -0
- airflow/providers/google/marketing_platform/operators/analytics_admin.py +4 -2
- airflow/providers/google/marketing_platform/operators/campaign_manager.py +1 -0
- airflow/providers/google/marketing_platform/operators/display_video.py +1 -0
- airflow/providers/google/marketing_platform/operators/search_ads.py +1 -0
- airflow/providers/google/marketing_platform/sensors/campaign_manager.py +1 -0
- airflow/providers/google/marketing_platform/sensors/display_video.py +2 -1
- airflow/providers/google/marketing_platform/sensors/search_ads.py +1 -0
- airflow/providers/google/suite/hooks/calendar.py +1 -0
- airflow/providers/google/suite/hooks/drive.py +1 -0
- airflow/providers/google/suite/hooks/sheets.py +1 -0
- airflow/providers/google/suite/sensors/drive.py +1 -0
- airflow/providers/google/suite/transfers/gcs_to_gdrive.py +7 -0
- airflow/providers/google/suite/transfers/gcs_to_sheets.py +4 -1
- airflow/providers/google/suite/transfers/local_to_drive.py +1 -0
- {apache_airflow_providers_google-10.16.0.dist-info → apache_airflow_providers_google-10.17.0.dist-info}/METADATA +16 -11
- {apache_airflow_providers_google-10.16.0.dist-info → apache_airflow_providers_google-10.17.0.dist-info}/RECORD +196 -194
- {apache_airflow_providers_google-10.16.0.dist-info → apache_airflow_providers_google-10.17.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.16.0.dist-info → apache_airflow_providers_google-10.17.0.dist-info}/entry_points.txt +0 -0
@@ -16,6 +16,7 @@
|
|
16
16
|
# specific language governing permissions and limitations
|
17
17
|
# under the License.
|
18
18
|
"""This module contains Google Ad hook."""
|
19
|
+
|
19
20
|
from __future__ import annotations
|
20
21
|
|
21
22
|
from functools import cached_property
|
@@ -31,9 +32,9 @@ from airflow.hooks.base import BaseHook
|
|
31
32
|
from airflow.providers.google.common.hooks.base_google import get_field
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
|
-
from google.ads.googleads.
|
35
|
-
from google.ads.googleads.
|
36
|
-
from google.ads.googleads.
|
35
|
+
from google.ads.googleads.v16.services.services.customer_service import CustomerServiceClient
|
36
|
+
from google.ads.googleads.v16.services.services.google_ads_service import GoogleAdsServiceClient
|
37
|
+
from google.ads.googleads.v16.services.types.google_ads_service import GoogleAdsRow
|
37
38
|
from google.api_core.page_iterator import GRPCIterator
|
38
39
|
|
39
40
|
|
@@ -99,7 +100,7 @@ class GoogleAdsHook(BaseHook):
|
|
99
100
|
:param api_version: The Google Ads API version to use.
|
100
101
|
"""
|
101
102
|
|
102
|
-
default_api_version = "
|
103
|
+
default_api_version = "v16"
|
103
104
|
|
104
105
|
def __init__(
|
105
106
|
self,
|
@@ -47,12 +47,11 @@ def get_fs(conn_id: str | None, storage_options: dict[str, str] | None = None) -
|
|
47
47
|
return GCSFileSystem()
|
48
48
|
|
49
49
|
g = GoogleBaseHook(gcp_conn_id=conn_id)
|
50
|
-
creds = g.get_credentials()
|
51
50
|
|
52
51
|
options = {
|
53
52
|
"project": g.project_id,
|
54
53
|
"access": g.extras.get(GCS_ACCESS, "full_control"),
|
55
|
-
"token":
|
54
|
+
"token": g._get_access_token(),
|
56
55
|
"consistency": g.extras.get(GCS_CONSISTENCY, "none"),
|
57
56
|
"cache_timeout": g.extras.get(GCS_CACHE_TIMEOUT),
|
58
57
|
"requester_pays": g.extras.get(GCS_REQUESTER_PAYS, False),
|
@@ -28,6 +28,7 @@ import time
|
|
28
28
|
import uuid
|
29
29
|
from copy import deepcopy
|
30
30
|
from datetime import datetime, timedelta
|
31
|
+
from functools import cached_property
|
31
32
|
from typing import TYPE_CHECKING, Any, Iterable, Mapping, NoReturn, Sequence, Union, cast
|
32
33
|
|
33
34
|
from aiohttp import ClientSession as ClientSession
|
@@ -56,6 +57,7 @@ from sqlalchemy import create_engine
|
|
56
57
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
57
58
|
from airflow.providers.common.sql.hooks.sql import DbApiHook
|
58
59
|
from airflow.providers.google.cloud.utils.bigquery import bq_cast
|
60
|
+
from airflow.providers.google.cloud.utils.credentials_provider import _get_scopes
|
59
61
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
60
62
|
from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHook, GoogleBaseHook, get_field
|
61
63
|
|
@@ -92,6 +94,8 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
92
94
|
Google BigQuery jobs.
|
93
95
|
:param impersonation_chain: This is the optional service account to
|
94
96
|
impersonate using short term credentials.
|
97
|
+
:param impersonation_scopes: Optional list of scopes for impersonated account.
|
98
|
+
Will override scopes from connection.
|
95
99
|
:param labels: The BigQuery resource label.
|
96
100
|
"""
|
97
101
|
|
@@ -100,14 +104,50 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
100
104
|
conn_type = "gcpbigquery"
|
101
105
|
hook_name = "Google Bigquery"
|
102
106
|
|
107
|
+
@classmethod
|
108
|
+
def get_connection_form_widgets(cls) -> dict[str, Any]:
|
109
|
+
"""Return connection widgets to add to connection form."""
|
110
|
+
from flask_appbuilder.fieldwidgets import BS3TextFieldWidget
|
111
|
+
from flask_babel import lazy_gettext
|
112
|
+
from wtforms import validators
|
113
|
+
from wtforms.fields.simple import BooleanField, StringField
|
114
|
+
|
115
|
+
from airflow.www.validators import ValidJson
|
116
|
+
|
117
|
+
connection_form_widgets = super().get_connection_form_widgets()
|
118
|
+
connection_form_widgets["use_legacy_sql"] = BooleanField(lazy_gettext("Use Legacy SQL"), default=True)
|
119
|
+
connection_form_widgets["location"] = StringField(
|
120
|
+
lazy_gettext("Location"), widget=BS3TextFieldWidget()
|
121
|
+
)
|
122
|
+
connection_form_widgets["priority"] = StringField(
|
123
|
+
lazy_gettext("Priority"),
|
124
|
+
default="INTERACTIVE",
|
125
|
+
widget=BS3TextFieldWidget(),
|
126
|
+
validators=[validators.AnyOf(["INTERACTIVE", "BATCH"])],
|
127
|
+
)
|
128
|
+
connection_form_widgets["api_resource_configs"] = StringField(
|
129
|
+
lazy_gettext("API Resource Configs"), widget=BS3TextFieldWidget(), validators=[ValidJson()]
|
130
|
+
)
|
131
|
+
connection_form_widgets["labels"] = StringField(
|
132
|
+
lazy_gettext("Labels"), widget=BS3TextFieldWidget(), validators=[ValidJson()]
|
133
|
+
)
|
134
|
+
connection_form_widgets["labels"] = StringField(
|
135
|
+
lazy_gettext("Labels"), widget=BS3TextFieldWidget(), validators=[ValidJson()]
|
136
|
+
)
|
137
|
+
return connection_form_widgets
|
138
|
+
|
139
|
+
@classmethod
|
140
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
141
|
+
"""Return custom field behaviour."""
|
142
|
+
return super().get_ui_field_behaviour()
|
143
|
+
|
103
144
|
def __init__(
|
104
145
|
self,
|
105
|
-
gcp_conn_id: str = GoogleBaseHook.default_conn_name,
|
106
146
|
use_legacy_sql: bool = True,
|
107
147
|
location: str | None = None,
|
108
148
|
priority: str = "INTERACTIVE",
|
109
149
|
api_resource_configs: dict | None = None,
|
110
|
-
|
150
|
+
impersonation_scopes: str | Sequence[str] | None = None,
|
111
151
|
labels: dict | None = None,
|
112
152
|
**kwargs,
|
113
153
|
) -> None:
|
@@ -116,17 +156,25 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
116
156
|
"The `delegate_to` parameter has been deprecated before and finally removed in this version"
|
117
157
|
" of Google Provider. You MUST convert it to `impersonate_chain`"
|
118
158
|
)
|
119
|
-
super().__init__(
|
120
|
-
|
121
|
-
|
122
|
-
)
|
123
|
-
self.use_legacy_sql = use_legacy_sql
|
124
|
-
self.location = location
|
125
|
-
self.priority = priority
|
159
|
+
super().__init__(**kwargs)
|
160
|
+
self.use_legacy_sql: bool = self._get_field("use_legacy_sql", use_legacy_sql)
|
161
|
+
self.location: str | None = self._get_field("location", location)
|
162
|
+
self.priority: str = self._get_field("priority", priority)
|
126
163
|
self.running_job_id: str | None = None
|
127
|
-
self.api_resource_configs: dict = api_resource_configs or {}
|
128
|
-
self.labels = labels
|
129
|
-
self.
|
164
|
+
self.api_resource_configs: dict = self._get_field("api_resource_configs", api_resource_configs or {})
|
165
|
+
self.labels = self._get_field("labels", labels or {})
|
166
|
+
self.impersonation_scopes: str | Sequence[str] | None = impersonation_scopes
|
167
|
+
|
168
|
+
@cached_property
|
169
|
+
@deprecated(
|
170
|
+
reason=(
|
171
|
+
"`BigQueryHook.credentials_path` property is deprecated and will be removed in the future. "
|
172
|
+
"This property used for obtaining credentials path but no longer in actual use. "
|
173
|
+
),
|
174
|
+
category=AirflowProviderDeprecationWarning,
|
175
|
+
)
|
176
|
+
def credentials_path(self) -> str:
|
177
|
+
return "bigquery_hook_credentials.json"
|
130
178
|
|
131
179
|
def get_conn(self) -> BigQueryConnection:
|
132
180
|
"""Get a BigQuery PEP 249 connection object."""
|
@@ -167,18 +215,17 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
167
215
|
"""Override from ``DbApiHook`` for ``get_sqlalchemy_engine()``."""
|
168
216
|
return f"bigquery://{self.project_id}"
|
169
217
|
|
170
|
-
def get_sqlalchemy_engine(self, engine_kwargs=None):
|
218
|
+
def get_sqlalchemy_engine(self, engine_kwargs: dict | None = None):
|
171
219
|
"""Create an SQLAlchemy engine object.
|
172
220
|
|
173
221
|
:param engine_kwargs: Kwargs used in :func:`~sqlalchemy.create_engine`.
|
174
222
|
"""
|
175
223
|
if engine_kwargs is None:
|
176
224
|
engine_kwargs = {}
|
177
|
-
|
178
|
-
credentials_path = get_field(extras, "key_path")
|
225
|
+
credentials_path = get_field(self.extras, "key_path")
|
179
226
|
if credentials_path:
|
180
227
|
return create_engine(self.get_uri(), credentials_path=credentials_path, **engine_kwargs)
|
181
|
-
keyfile_dict = get_field(extras, "keyfile_dict")
|
228
|
+
keyfile_dict = get_field(self.extras, "keyfile_dict")
|
182
229
|
if keyfile_dict:
|
183
230
|
keyfile_content = keyfile_dict if isinstance(keyfile_dict, dict) else json.loads(keyfile_dict)
|
184
231
|
return create_engine(self.get_uri(), credentials_info=keyfile_content, **engine_kwargs)
|
@@ -2290,7 +2337,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2290
2337
|
return f"Format exception for {var_name}: "
|
2291
2338
|
|
2292
2339
|
if table_input.count(".") + table_input.count(":") > 3:
|
2293
|
-
raise
|
2340
|
+
raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
|
2294
2341
|
cmpt = table_input.rsplit(":", 1)
|
2295
2342
|
project_id = None
|
2296
2343
|
rest = table_input
|
@@ -2302,7 +2349,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2302
2349
|
project_id = cmpt[0]
|
2303
2350
|
rest = cmpt[1]
|
2304
2351
|
else:
|
2305
|
-
raise
|
2352
|
+
raise ValueError(
|
2306
2353
|
f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
|
2307
2354
|
)
|
2308
2355
|
|
@@ -2318,7 +2365,7 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2318
2365
|
dataset_id = cmpt[0]
|
2319
2366
|
table_id = cmpt[1]
|
2320
2367
|
else:
|
2321
|
-
raise
|
2368
|
+
raise ValueError(
|
2322
2369
|
f"{var_print(var_name)} Expect format of (<project.|<project:)<dataset>.<table>, "
|
2323
2370
|
f"got {table_input}"
|
2324
2371
|
)
|
@@ -2335,6 +2382,20 @@ class BigQueryHook(GoogleBaseHook, DbApiHook):
|
|
2335
2382
|
|
2336
2383
|
return project_id, dataset_id, table_id
|
2337
2384
|
|
2385
|
+
@property
|
2386
|
+
def scopes(self) -> Sequence[str]:
|
2387
|
+
"""
|
2388
|
+
Return OAuth 2.0 scopes.
|
2389
|
+
|
2390
|
+
:return: Returns the scope defined in impersonation_scopes, the connection configuration, or the default scope
|
2391
|
+
"""
|
2392
|
+
scope_value: str | None
|
2393
|
+
if self.impersonation_chain and self.impersonation_scopes:
|
2394
|
+
scope_value = ",".join(self.impersonation_scopes)
|
2395
|
+
else:
|
2396
|
+
scope_value = self._get_field("scope", None)
|
2397
|
+
return _get_scopes(scope_value)
|
2398
|
+
|
2338
2399
|
|
2339
2400
|
class BigQueryConnection:
|
2340
2401
|
"""BigQuery connection.
|
@@ -3088,7 +3149,7 @@ def split_tablename(
|
|
3088
3149
|
return f"Format exception for {var_name}: "
|
3089
3150
|
|
3090
3151
|
if table_input.count(".") + table_input.count(":") > 3:
|
3091
|
-
raise
|
3152
|
+
raise ValueError(f"{var_print(var_name)}Use either : or . to specify project got {table_input}")
|
3092
3153
|
cmpt = table_input.rsplit(":", 1)
|
3093
3154
|
project_id = None
|
3094
3155
|
rest = table_input
|
@@ -3100,7 +3161,7 @@ def split_tablename(
|
|
3100
3161
|
project_id = cmpt[0]
|
3101
3162
|
rest = cmpt[1]
|
3102
3163
|
else:
|
3103
|
-
raise
|
3164
|
+
raise ValueError(
|
3104
3165
|
f"{var_print(var_name)}Expect format of (<project:)<dataset>.<table>, got {table_input}"
|
3105
3166
|
)
|
3106
3167
|
|
@@ -3116,7 +3177,7 @@ def split_tablename(
|
|
3116
3177
|
dataset_id = cmpt[0]
|
3117
3178
|
table_id = cmpt[1]
|
3118
3179
|
else:
|
3119
|
-
raise
|
3180
|
+
raise ValueError(
|
3120
3181
|
f"{var_print(var_name)}Expect format of (<project.|<project:)<dataset>.<table>, got {table_input}"
|
3121
3182
|
)
|
3122
3183
|
|
@@ -3185,7 +3246,7 @@ def _validate_src_fmt_configs(
|
|
3185
3246
|
if k not in src_fmt_configs and k in valid_configs:
|
3186
3247
|
src_fmt_configs[k] = v
|
3187
3248
|
|
3188
|
-
for k
|
3249
|
+
for k in src_fmt_configs:
|
3189
3250
|
if k not in valid_configs:
|
3190
3251
|
raise ValueError(f"{k} is not a valid src_fmt_configs for type {source_format}.")
|
3191
3252
|
|
@@ -3311,6 +3372,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3311
3372
|
async def create_job_for_partition_get(
|
3312
3373
|
self,
|
3313
3374
|
dataset_id: str | None,
|
3375
|
+
table_id: str | None = None,
|
3314
3376
|
project_id: str | None = None,
|
3315
3377
|
):
|
3316
3378
|
"""Create a new job and get the job_id using gcloud-aio."""
|
@@ -3320,7 +3382,8 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
3320
3382
|
|
3321
3383
|
query_request = {
|
3322
3384
|
"query": "SELECT partition_id "
|
3323
|
-
f"FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.PARTITIONS`"
|
3385
|
+
f"FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.PARTITIONS`"
|
3386
|
+
+ (f" WHERE table_id={table_id}" if table_id else ""),
|
3324
3387
|
"useLegacySql": False,
|
3325
3388
|
}
|
3326
3389
|
job_query_resp = await job_client.query(query_request, cast(Session, session))
|
@@ -45,6 +45,7 @@ from googleapiclient.discovery import Resource, build
|
|
45
45
|
from googleapiclient.errors import HttpError
|
46
46
|
|
47
47
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
48
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
48
49
|
from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHook, GoogleBaseHook
|
49
50
|
|
50
51
|
if TYPE_CHECKING:
|
@@ -508,14 +509,18 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
508
509
|
self.project_id = project_id
|
509
510
|
self._client: StorageTransferServiceAsyncClient | None = None
|
510
511
|
|
511
|
-
def get_conn(self) -> StorageTransferServiceAsyncClient:
|
512
|
+
async def get_conn(self) -> StorageTransferServiceAsyncClient:
|
512
513
|
"""
|
513
514
|
Return async connection to the Storage Transfer Service.
|
514
515
|
|
515
516
|
:return: Google Storage Transfer asynchronous client.
|
516
517
|
"""
|
517
518
|
if not self._client:
|
518
|
-
|
519
|
+
credentials = (await self.get_sync_hook()).get_credentials()
|
520
|
+
self._client = StorageTransferServiceAsyncClient(
|
521
|
+
credentials=credentials,
|
522
|
+
client_info=CLIENT_INFO,
|
523
|
+
)
|
519
524
|
return self._client
|
520
525
|
|
521
526
|
async def get_jobs(self, job_names: list[str]) -> ListTransferJobsAsyncPager:
|
@@ -525,7 +530,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
525
530
|
:param job_names: (Required) List of names of the jobs to be fetched.
|
526
531
|
:return: Object that yields Transfer jobs.
|
527
532
|
"""
|
528
|
-
client = self.get_conn()
|
533
|
+
client = await self.get_conn()
|
529
534
|
jobs_list_request = ListTransferJobsRequest(
|
530
535
|
filter=json.dumps({"project_id": self.project_id, "job_names": job_names})
|
531
536
|
)
|
@@ -540,7 +545,7 @@ class CloudDataTransferServiceAsyncHook(GoogleBaseAsyncHook):
|
|
540
545
|
"""
|
541
546
|
latest_operation_name = job.latest_operation_name
|
542
547
|
if latest_operation_name:
|
543
|
-
client = self.get_conn()
|
548
|
+
client = await self.get_conn()
|
544
549
|
response_operation = await client.transport.operations_client.get_operation(latest_operation_name)
|
545
550
|
operation = TransferOperation.deserialize(response_operation.metadata.value)
|
546
551
|
return operation
|
@@ -281,7 +281,7 @@ class ComputeEngineSSHHook(SSHHook):
|
|
281
281
|
if retry == self.max_retries:
|
282
282
|
raise AirflowException("Maximum retries exceeded. Aborting operation.")
|
283
283
|
delay = random.randint(0, max_delay)
|
284
|
-
self.log.info(
|
284
|
+
self.log.info("Failed establish SSH connection, waiting %s seconds to retry...", delay)
|
285
285
|
time.sleep(delay)
|
286
286
|
if not sshclient:
|
287
287
|
raise AirflowException("Unable to establish SSH connection.")
|
@@ -334,7 +334,7 @@ class ComputeEngineSSHHook(SSHHook):
|
|
334
334
|
)
|
335
335
|
|
336
336
|
def _authorize_os_login(self, pubkey):
|
337
|
-
username = self._oslogin_hook._get_credentials_email
|
337
|
+
username = self._oslogin_hook._get_credentials_email
|
338
338
|
self.log.info("Importing SSH public key using OSLogin: user=%s", username)
|
339
339
|
expiration = int((time.time() + self.expire_time) * 1000000)
|
340
340
|
ssh_public_key = {"key": pubkey, "expiration_time_usec": expiration}
|
@@ -16,6 +16,7 @@
|
|
16
16
|
# specific language governing permissions and limitations
|
17
17
|
# under the License.
|
18
18
|
"""This module contains a Google Dataflow Hook."""
|
19
|
+
|
19
20
|
from __future__ import annotations
|
20
21
|
|
21
22
|
import functools
|
@@ -251,7 +252,7 @@ class _DataflowJobsController(LoggingMixin):
|
|
251
252
|
self._job_id = jobs[0]["id"]
|
252
253
|
return jobs
|
253
254
|
else:
|
254
|
-
raise
|
255
|
+
raise ValueError("Missing both dataflow job ID and name.")
|
255
256
|
|
256
257
|
def fetch_job_by_id(self, job_id: str) -> dict:
|
257
258
|
"""
|
@@ -409,18 +410,18 @@ class _DataflowJobsController(LoggingMixin):
|
|
409
410
|
else:
|
410
411
|
terminal_states = DataflowJobStatus.TERMINAL_STATES | {DataflowJobStatus.JOB_STATE_RUNNING}
|
411
412
|
if self._expected_terminal_state not in terminal_states:
|
412
|
-
raise
|
413
|
+
raise AirflowException(
|
413
414
|
f"Google Cloud Dataflow job's expected terminal state "
|
414
415
|
f"'{self._expected_terminal_state}' is invalid."
|
415
416
|
f" The value should be any of the following: {terminal_states}"
|
416
417
|
)
|
417
418
|
elif is_streaming and self._expected_terminal_state == DataflowJobStatus.JOB_STATE_DONE:
|
418
|
-
raise
|
419
|
+
raise AirflowException(
|
419
420
|
"Google Cloud Dataflow job's expected terminal state cannot be "
|
420
421
|
"JOB_STATE_DONE while it is a streaming job"
|
421
422
|
)
|
422
423
|
elif not is_streaming and self._expected_terminal_state == DataflowJobStatus.JOB_STATE_DRAINED:
|
423
|
-
raise
|
424
|
+
raise AirflowException(
|
424
425
|
"Google Cloud Dataflow job's expected terminal state cannot be "
|
425
426
|
"JOB_STATE_DRAINED while it is a batch job"
|
426
427
|
)
|
@@ -434,7 +435,7 @@ class _DataflowJobsController(LoggingMixin):
|
|
434
435
|
return self._wait_until_finished is False
|
435
436
|
|
436
437
|
self.log.debug("Current job: %s", job)
|
437
|
-
raise
|
438
|
+
raise AirflowException(
|
438
439
|
f"Google Cloud Dataflow job {job['name']} is in an unexpected terminal state: {current_state}, "
|
439
440
|
f"expected terminal state: {self._expected_terminal_state}"
|
440
441
|
)
|
@@ -16,6 +16,7 @@
|
|
16
16
|
# specific language governing permissions and limitations
|
17
17
|
# under the License.
|
18
18
|
"""This module contains a Google Cloud Dataproc hook."""
|
19
|
+
|
19
20
|
from __future__ import annotations
|
20
21
|
|
21
22
|
import time
|
@@ -157,10 +158,10 @@ class DataProcJobBuilder:
|
|
157
158
|
|
158
159
|
:param main_jar: URI for the main file.
|
159
160
|
:param main_class: Name of the main class.
|
160
|
-
:raises:
|
161
|
+
:raises: ValueError
|
161
162
|
"""
|
162
163
|
if main_class is not None and main_jar is not None:
|
163
|
-
raise
|
164
|
+
raise ValueError("Set either main_jar or main_class")
|
164
165
|
if main_jar:
|
165
166
|
self.job["job"][self.job_type]["main_jar_file_uri"] = main_jar
|
166
167
|
else:
|