apache-airflow-providers-google 14.1.0__py3-none-any.whl → 15.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +1 -1
- airflow/providers/google/ads/hooks/ads.py +7 -33
- airflow/providers/google/ads/transfers/ads_to_gcs.py +1 -17
- airflow/providers/google/cloud/hooks/bigquery.py +6 -11
- airflow/providers/google/cloud/hooks/cloud_batch.py +1 -2
- airflow/providers/google/cloud/hooks/cloud_build.py +1 -54
- airflow/providers/google/cloud/hooks/compute.py +4 -3
- airflow/providers/google/cloud/hooks/dataflow.py +2 -139
- airflow/providers/google/cloud/hooks/dataform.py +6 -12
- airflow/providers/google/cloud/hooks/datafusion.py +1 -2
- airflow/providers/google/cloud/hooks/dataplex.py +1 -1
- airflow/providers/google/cloud/hooks/gcs.py +13 -5
- airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
- airflow/providers/google/cloud/hooks/translate.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -2
- airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +2 -272
- airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -1
- airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
- airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -1
- airflow/providers/google/cloud/links/cloud_storage_transfer.py +1 -3
- airflow/providers/google/cloud/links/dataproc.py +0 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +147 -115
- airflow/providers/google/cloud/openlineage/facets.py +32 -32
- airflow/providers/google/cloud/openlineage/mixins.py +2 -2
- airflow/providers/google/cloud/operators/automl.py +1 -1
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +0 -3
- airflow/providers/google/cloud/operators/datafusion.py +1 -22
- airflow/providers/google/cloud/operators/dataproc.py +1 -143
- airflow/providers/google/cloud/operators/dataproc_metastore.py +0 -1
- airflow/providers/google/cloud/operators/mlengine.py +3 -1406
- airflow/providers/google/cloud/operators/spanner.py +1 -2
- airflow/providers/google/cloud/operators/translate.py +2 -2
- airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +0 -12
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +1 -22
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +4 -3
- airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -1
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +1 -2
- airflow/providers/google/cloud/transfers/sftp_to_gcs.py +23 -10
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -2
- airflow/providers/google/common/auth_backend/google_openid.py +1 -1
- airflow/providers/google/common/hooks/base_google.py +7 -28
- airflow/providers/google/get_provider_info.py +3 -1
- airflow/providers/google/marketing_platform/sensors/display_video.py +1 -1
- airflow/providers/google/suite/hooks/drive.py +2 -2
- {apache_airflow_providers_google-14.1.0.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/METADATA +11 -9
- {apache_airflow_providers_google-14.1.0.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/RECORD +49 -50
- airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
- {apache_airflow_providers_google-14.1.0.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-14.1.0.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
29
29
|
|
30
30
|
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
__version__ = "
|
32
|
+
__version__ = "15.0.0"
|
33
33
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
35
35
|
"2.9.0"
|
@@ -19,7 +19,6 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
import warnings
|
23
22
|
from functools import cached_property
|
24
23
|
from tempfile import NamedTemporaryFile
|
25
24
|
from typing import IO, TYPE_CHECKING, Any, Literal
|
@@ -28,7 +27,7 @@ from google.ads.googleads.client import GoogleAdsClient
|
|
28
27
|
from google.ads.googleads.errors import GoogleAdsException
|
29
28
|
from google.auth.exceptions import GoogleAuthError
|
30
29
|
|
31
|
-
from airflow.exceptions import AirflowException
|
30
|
+
from airflow.exceptions import AirflowException
|
32
31
|
from airflow.hooks.base import BaseHook
|
33
32
|
from airflow.providers.google.common.hooks.base_google import get_field
|
34
33
|
|
@@ -116,9 +115,7 @@ class GoogleAdsHook(BaseHook):
|
|
116
115
|
self.google_ads_config: dict[str, Any] = {}
|
117
116
|
self.authentication_method: Literal["service_account", "developer_token"] = "service_account"
|
118
117
|
|
119
|
-
def search(
|
120
|
-
self, client_ids: list[str], query: str, page_size: int | None = None, **kwargs
|
121
|
-
) -> list[GoogleAdsRow]:
|
118
|
+
def search(self, client_ids: list[str], query: str, **kwargs) -> list[GoogleAdsRow]:
|
122
119
|
"""
|
123
120
|
Pull data from the Google Ads API.
|
124
121
|
|
@@ -134,18 +131,14 @@ class GoogleAdsHook(BaseHook):
|
|
134
131
|
|
135
132
|
:param client_ids: Google Ads client ID(s) to query the API for.
|
136
133
|
:param query: Google Ads Query Language query.
|
137
|
-
:param page_size: Number of results to return per page. Max 10000 (for version 16 and 16.1)
|
138
|
-
This parameter deprecated. After February 05, 2025, it will be removed.
|
139
134
|
:return: Google Ads API response, converted to Google Ads Row objects.
|
140
135
|
"""
|
141
|
-
data_proto_plus = self._search(client_ids, query,
|
136
|
+
data_proto_plus = self._search(client_ids, query, **kwargs)
|
142
137
|
data_native_pb = [row._pb for row in data_proto_plus]
|
143
138
|
|
144
139
|
return data_native_pb
|
145
140
|
|
146
|
-
def search_proto_plus(
|
147
|
-
self, client_ids: list[str], query: str, page_size: int | None = None, **kwargs
|
148
|
-
) -> list[GoogleAdsRow]:
|
141
|
+
def search_proto_plus(self, client_ids: list[str], query: str, **kwargs) -> list[GoogleAdsRow]:
|
149
142
|
"""
|
150
143
|
Pull data from the Google Ads API.
|
151
144
|
|
@@ -154,11 +147,9 @@ class GoogleAdsHook(BaseHook):
|
|
154
147
|
|
155
148
|
:param client_ids: Google Ads client ID(s) to query the API for.
|
156
149
|
:param query: Google Ads Query Language query.
|
157
|
-
:param page_size: Number of results to return per page. Max 10000 (for version 16 and 16.1)
|
158
|
-
This parameter is deprecated. After February 05, 2025, it will be removed.
|
159
150
|
:return: Google Ads API response, converted to Google Ads Row objects
|
160
151
|
"""
|
161
|
-
return self._search(client_ids, query,
|
152
|
+
return self._search(client_ids, query, **kwargs)
|
162
153
|
|
163
154
|
def list_accessible_customers(self) -> list[str]:
|
164
155
|
"""
|
@@ -269,37 +260,20 @@ class GoogleAdsHook(BaseHook):
|
|
269
260
|
|
270
261
|
self.google_ads_config["json_key_file_path"] = secrets_temp.name
|
271
262
|
|
272
|
-
def _search(
|
273
|
-
self, client_ids: list[str], query: str, page_size: int | None = None, **kwargs
|
274
|
-
) -> list[GoogleAdsRow]:
|
263
|
+
def _search(self, client_ids: list[str], query: str, **kwargs) -> list[GoogleAdsRow]:
|
275
264
|
"""
|
276
265
|
Pull data from the Google Ads API.
|
277
266
|
|
278
267
|
:param client_ids: Google Ads client ID(s) to query the API for.
|
279
268
|
:param query: Google Ads Query Language query.
|
280
|
-
:param page_size: Number of results to return per page. Max 10000 (for version 16 and 16.1)
|
281
|
-
This parameter is deprecated. After February 05, 2025, it will be removed.
|
282
269
|
|
283
270
|
:return: Google Ads API response, converted to Google Ads Row objects
|
284
271
|
"""
|
285
272
|
service = self._get_service
|
286
273
|
|
287
|
-
extra_req_params = {}
|
288
|
-
if self.api_version == "v16": # TODO: remove this after deprecation removal for page_size parameter
|
289
|
-
extra_req_params["page_size"] = page_size or 10000
|
290
|
-
else:
|
291
|
-
if page_size:
|
292
|
-
warnings.warn(
|
293
|
-
"page_size parameter for the GoogleAdsHook.search and "
|
294
|
-
"GoogleAdsHook.search_proto_plus method is deprecated and will be removed "
|
295
|
-
"after February 05, 2025.",
|
296
|
-
AirflowProviderDeprecationWarning,
|
297
|
-
stacklevel=2,
|
298
|
-
)
|
299
|
-
|
300
274
|
iterators = []
|
301
275
|
for client_id in client_ids:
|
302
|
-
iterator = service.search(request={"customer_id": client_id, "query": query
|
276
|
+
iterator = service.search(request={"customer_id": client_id, "query": query})
|
303
277
|
iterators.append(iterator)
|
304
278
|
|
305
279
|
self.log.info("Fetched Google Ads Iterators")
|
@@ -17,13 +17,11 @@
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
19
|
import csv
|
20
|
-
import warnings
|
21
20
|
from collections.abc import Sequence
|
22
21
|
from operator import attrgetter
|
23
22
|
from tempfile import NamedTemporaryFile
|
24
23
|
from typing import TYPE_CHECKING
|
25
24
|
|
26
|
-
from airflow.exceptions import AirflowProviderDeprecationWarning
|
27
25
|
from airflow.models import BaseOperator
|
28
26
|
from airflow.providers.google.ads.hooks.ads import GoogleAdsHook
|
29
27
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
@@ -54,8 +52,6 @@ class GoogleAdsToGcsOperator(BaseOperator):
|
|
54
52
|
:param obj: GCS path to save the object. Must be the full file path (ex. `path/to/file.txt`)
|
55
53
|
:param gcp_conn_id: Airflow Google Cloud connection ID
|
56
54
|
:param google_ads_conn_id: Airflow Google Ads connection ID
|
57
|
-
:param page_size: The number of results per API page request. Max 10,000 (for version 16 and 16.1)
|
58
|
-
This parameter deprecated. After March 01, 2025, it will be removed.
|
59
55
|
:param gzip: Option to compress local file or file data for upload
|
60
56
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
61
57
|
credentials, or chained list of accounts required to get the access_token
|
@@ -87,7 +83,6 @@ class GoogleAdsToGcsOperator(BaseOperator):
|
|
87
83
|
obj: str,
|
88
84
|
gcp_conn_id: str = "google_cloud_default",
|
89
85
|
google_ads_conn_id: str = "google_ads_default",
|
90
|
-
page_size: int | None = None,
|
91
86
|
gzip: bool = False,
|
92
87
|
impersonation_chain: str | Sequence[str] | None = None,
|
93
88
|
api_version: str | None = None,
|
@@ -101,8 +96,6 @@ class GoogleAdsToGcsOperator(BaseOperator):
|
|
101
96
|
self.obj = obj
|
102
97
|
self.gcp_conn_id = gcp_conn_id
|
103
98
|
self.google_ads_conn_id = google_ads_conn_id
|
104
|
-
# TODO: remove this after deprecation removal for page_size parameter
|
105
|
-
self.page_size = page_size or 10000 if api_version == "v16" else None
|
106
99
|
self.gzip = gzip
|
107
100
|
self.impersonation_chain = impersonation_chain
|
108
101
|
self.api_version = api_version
|
@@ -114,16 +107,7 @@ class GoogleAdsToGcsOperator(BaseOperator):
|
|
114
107
|
api_version=self.api_version,
|
115
108
|
)
|
116
109
|
|
117
|
-
|
118
|
-
warnings.warn(
|
119
|
-
"page_size parameter for the GoogleAdsToGcsOperator is deprecated and will be removed "
|
120
|
-
"after March 01, 2025.",
|
121
|
-
AirflowProviderDeprecationWarning,
|
122
|
-
stacklevel=2,
|
123
|
-
)
|
124
|
-
rows = service.search(client_ids=self.client_ids, query=self.query)
|
125
|
-
else:
|
126
|
-
rows = service.search(client_ids=self.client_ids, query=self.query, page_size=self.page_size)
|
110
|
+
rows = service.search(client_ids=self.client_ids, query=self.query)
|
127
111
|
|
128
112
|
try:
|
129
113
|
getter = attrgetter(*self.attributes)
|
@@ -56,7 +56,6 @@ from google.cloud.exceptions import NotFound
|
|
56
56
|
from googleapiclient.discovery import build
|
57
57
|
from pandas_gbq import read_gbq
|
58
58
|
from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
|
59
|
-
from requests import Session
|
60
59
|
from sqlalchemy import create_engine
|
61
60
|
|
62
61
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -80,6 +79,7 @@ if TYPE_CHECKING:
|
|
80
79
|
import pandas as pd
|
81
80
|
from google.api_core.page_iterator import HTTPIterator
|
82
81
|
from google.api_core.retry import Retry
|
82
|
+
from requests import Session
|
83
83
|
|
84
84
|
log = logging.getLogger(__name__)
|
85
85
|
|
@@ -2116,7 +2116,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
2116
2116
|
job_id=job_id,
|
2117
2117
|
project=project_id,
|
2118
2118
|
token=token,
|
2119
|
-
session=cast(Session, session),
|
2119
|
+
session=cast("Session", session),
|
2120
2120
|
)
|
2121
2121
|
|
2122
2122
|
async def _get_job(
|
@@ -2181,7 +2181,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
2181
2181
|
async with ClientSession() as session:
|
2182
2182
|
self.log.info("Executing get_job_output..")
|
2183
2183
|
job_client = await self.get_job_instance(project_id, job_id, session)
|
2184
|
-
job_query_response = await job_client.get_query_results(cast(Session, session))
|
2184
|
+
job_query_response = await job_client.get_query_results(cast("Session", session))
|
2185
2185
|
return job_query_response
|
2186
2186
|
|
2187
2187
|
async def create_job_for_partition_get(
|
@@ -2201,7 +2201,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
2201
2201
|
+ (f" WHERE table_name='{table_id}'" if table_id else ""),
|
2202
2202
|
"useLegacySql": False,
|
2203
2203
|
}
|
2204
|
-
job_query_resp = await job_client.query(query_request, cast(Session, session))
|
2204
|
+
job_query_resp = await job_client.query(query_request, cast("Session", session))
|
2205
2205
|
return job_query_resp["jobReference"]["jobId"]
|
2206
2206
|
|
2207
2207
|
async def cancel_job(self, job_id: str, project_id: str | None, location: str | None) -> None:
|
@@ -2381,12 +2381,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
|
|
2381
2381
|
test_results[metric] = float(ratios[metric]) < threshold
|
2382
2382
|
|
2383
2383
|
self.log.info(
|
2384
|
-
(
|
2385
|
-
"Current metric for %s: %s\n"
|
2386
|
-
"Past metric for %s: %s\n"
|
2387
|
-
"Ratio for %s: %s\n"
|
2388
|
-
"Threshold: %s\n"
|
2389
|
-
),
|
2384
|
+
("Current metric for %s: %s\nPast metric for %s: %s\nRatio for %s: %s\nThreshold: %s\n"),
|
2390
2385
|
metric,
|
2391
2386
|
cur,
|
2392
2387
|
metric,
|
@@ -2451,5 +2446,5 @@ class BigQueryTableAsyncHook(GoogleBaseAsyncHook):
|
|
2451
2446
|
table_name=table_id,
|
2452
2447
|
project=project_id,
|
2453
2448
|
token=token,
|
2454
|
-
session=cast(Session, session),
|
2449
|
+
session=cast("Session", session),
|
2455
2450
|
)
|
@@ -155,8 +155,7 @@ class CloudBatchHook(GoogleBaseHook):
|
|
155
155
|
raise AirflowException(message)
|
156
156
|
elif status == JobStatus.State.DELETION_IN_PROGRESS:
|
157
157
|
message = (
|
158
|
-
"Unexpected error in the operation: "
|
159
|
-
"Batch job with name {job_name} is being deleted."
|
158
|
+
"Unexpected error in the operation: Batch job with name {job_name} is being deleted."
|
160
159
|
)
|
161
160
|
raise AirflowException(message)
|
162
161
|
else:
|
@@ -27,9 +27,8 @@ from google.api_core.exceptions import AlreadyExists
|
|
27
27
|
from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
|
28
28
|
from google.cloud.devtools.cloudbuild_v1 import CloudBuildAsyncClient, CloudBuildClient, GetBuildRequest
|
29
29
|
|
30
|
-
from airflow.exceptions import AirflowException
|
30
|
+
from airflow.exceptions import AirflowException
|
31
31
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
32
|
-
from airflow.providers.google.common.deprecated import deprecated
|
33
32
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
34
33
|
from airflow.providers.google.common.hooks.operation_helpers import OperationHelper
|
35
34
|
|
@@ -178,58 +177,6 @@ class CloudBuildHook(GoogleBaseHook, OperationHelper):
|
|
178
177
|
|
179
178
|
return operation, id_
|
180
179
|
|
181
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
182
|
-
@deprecated(
|
183
|
-
planned_removal_date="March 01, 2025",
|
184
|
-
use_instead="create_build_without_waiting_for_result",
|
185
|
-
category=AirflowProviderDeprecationWarning,
|
186
|
-
)
|
187
|
-
def create_build(
|
188
|
-
self,
|
189
|
-
build: dict | Build,
|
190
|
-
project_id: str = PROVIDE_PROJECT_ID,
|
191
|
-
wait: bool = True,
|
192
|
-
retry: Retry | _MethodDefault = DEFAULT,
|
193
|
-
timeout: float | None = None,
|
194
|
-
metadata: Sequence[tuple[str, str]] = (),
|
195
|
-
) -> Build:
|
196
|
-
"""
|
197
|
-
Start a build with the specified configuration.
|
198
|
-
|
199
|
-
:param build: The build resource to create. If a dict is provided, it must be of the same form
|
200
|
-
as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.Build`
|
201
|
-
:param project_id: Optional, Google Cloud Project project_id where the function belongs.
|
202
|
-
If set to None or missing, the default project_id from the GCP connection is used.
|
203
|
-
:param wait: Optional, wait for operation to finish.
|
204
|
-
:param retry: Optional, a retry object used to retry requests. If `None` is specified, requests
|
205
|
-
will not be retried.
|
206
|
-
:param timeout: Optional, the amount of time, in seconds, to wait for the request to complete.
|
207
|
-
Note that if `retry` is specified, the timeout applies to each individual attempt.
|
208
|
-
:param metadata: Optional, additional metadata that is provided to the method.
|
209
|
-
|
210
|
-
"""
|
211
|
-
client = self.get_conn()
|
212
|
-
|
213
|
-
self.log.info("Start creating build...")
|
214
|
-
|
215
|
-
operation = client.create_build(
|
216
|
-
request={"project_id": project_id, "build": build},
|
217
|
-
retry=retry,
|
218
|
-
timeout=timeout,
|
219
|
-
metadata=metadata,
|
220
|
-
)
|
221
|
-
|
222
|
-
id_ = self._get_build_id_from_operation(operation)
|
223
|
-
|
224
|
-
if not wait:
|
225
|
-
return self.get_build(id_=id_, project_id=project_id)
|
226
|
-
|
227
|
-
operation.result()
|
228
|
-
|
229
|
-
self.log.info("Build has been created: %s.", id_)
|
230
|
-
|
231
|
-
return self.get_build(id_=id_, project_id=project_id)
|
232
|
-
|
233
180
|
@GoogleBaseHook.fallback_to_default_project_id
|
234
181
|
def create_build_trigger(
|
235
182
|
self,
|
@@ -29,6 +29,7 @@ from google.cloud.compute_v1.services.instances import InstancesClient
|
|
29
29
|
from googleapiclient.discovery import build
|
30
30
|
|
31
31
|
from airflow.exceptions import AirflowException
|
32
|
+
from airflow.providers.google.common.consts import CLIENT_INFO
|
32
33
|
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
|
33
34
|
|
34
35
|
if TYPE_CHECKING:
|
@@ -85,15 +86,15 @@ class ComputeEngineHook(GoogleBaseHook):
|
|
85
86
|
|
86
87
|
def get_compute_instance_template_client(self):
|
87
88
|
"""Return Compute Engine Instance Template Client."""
|
88
|
-
return InstanceTemplatesClient(credentials=self.get_credentials(), client_info=
|
89
|
+
return InstanceTemplatesClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
89
90
|
|
90
91
|
def get_compute_instance_client(self):
|
91
92
|
"""Return Compute Engine Instance Client."""
|
92
|
-
return InstancesClient(credentials=self.get_credentials(), client_info=
|
93
|
+
return InstancesClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
93
94
|
|
94
95
|
def get_compute_instance_group_managers_client(self):
|
95
96
|
"""Return Compute Engine Instance Group Managers Client."""
|
96
|
-
return InstanceGroupManagersClient(credentials=self.get_credentials(), client_info=
|
97
|
+
return InstanceGroupManagersClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
|
97
98
|
|
98
99
|
@GoogleBaseHook.fallback_to_default_project_id
|
99
100
|
def insert_instance_template(
|
@@ -51,7 +51,6 @@ from googleapiclient.discovery import Resource, build
|
|
51
51
|
|
52
52
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
53
53
|
from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
|
54
|
-
from airflow.providers.google.common.deprecated import deprecated
|
55
54
|
from airflow.providers.google.common.hooks.base_google import (
|
56
55
|
PROVIDE_PROJECT_ID,
|
57
56
|
GoogleBaseAsyncHook,
|
@@ -136,7 +135,7 @@ def _fallback_variable_parameter(parameter_name: str, variable_key_name: str) ->
|
|
136
135
|
|
137
136
|
return func(self, *args, **kwargs)
|
138
137
|
|
139
|
-
return cast(T, inner_wrapper)
|
138
|
+
return cast("T", inner_wrapper)
|
140
139
|
|
141
140
|
return _wrapper
|
142
141
|
|
@@ -586,66 +585,6 @@ class DataflowHook(GoogleBaseHook):
|
|
586
585
|
http_authorized = self._authorize()
|
587
586
|
return build("datapipelines", "v1", http=http_authorized, cache_discovery=False)
|
588
587
|
|
589
|
-
@_fallback_to_location_from_variables
|
590
|
-
@_fallback_to_project_id_from_variables
|
591
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
592
|
-
@deprecated(
|
593
|
-
planned_removal_date="March 01, 2025",
|
594
|
-
use_instead="airflow.providers.apache.beam.hooks.beam.start.start_java_pipeline, "
|
595
|
-
"providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done",
|
596
|
-
instructions="Please use airflow.providers.apache.beam.hooks.beam.start.start_java_pipeline "
|
597
|
-
"to start pipeline and providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done method "
|
598
|
-
"to wait for the required pipeline state instead.",
|
599
|
-
category=AirflowProviderDeprecationWarning,
|
600
|
-
)
|
601
|
-
def start_java_dataflow(
|
602
|
-
self,
|
603
|
-
job_name: str,
|
604
|
-
variables: dict,
|
605
|
-
jar: str,
|
606
|
-
project_id: str,
|
607
|
-
job_class: str | None = None,
|
608
|
-
append_job_name: bool = True,
|
609
|
-
multiple_jobs: bool = False,
|
610
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
611
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
612
|
-
) -> None:
|
613
|
-
"""
|
614
|
-
Start Dataflow java job.
|
615
|
-
|
616
|
-
:param job_name: The name of the job.
|
617
|
-
:param variables: Variables passed to the job.
|
618
|
-
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
619
|
-
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
620
|
-
:param jar: Name of the jar for the job
|
621
|
-
:param job_class: Name of the java class for the job.
|
622
|
-
:param append_job_name: True if unique suffix has to be appended to job name.
|
623
|
-
:param multiple_jobs: True if to check for multiple job in dataflow
|
624
|
-
:param on_new_job_id_callback: Callback called when the job ID is known.
|
625
|
-
:param location: Job location.
|
626
|
-
"""
|
627
|
-
name = self.build_dataflow_job_name(job_name, append_job_name)
|
628
|
-
|
629
|
-
variables["jobName"] = name
|
630
|
-
variables["region"] = location
|
631
|
-
variables["project"] = project_id
|
632
|
-
|
633
|
-
if "labels" in variables:
|
634
|
-
variables["labels"] = json.dumps(variables["labels"], separators=(",", ":"))
|
635
|
-
|
636
|
-
self.beam_hook.start_java_pipeline(
|
637
|
-
variables=variables,
|
638
|
-
jar=jar,
|
639
|
-
job_class=job_class,
|
640
|
-
process_line_callback=process_line_and_extract_dataflow_job_id_callback(on_new_job_id_callback),
|
641
|
-
)
|
642
|
-
self.wait_for_done(
|
643
|
-
job_name=name,
|
644
|
-
location=location,
|
645
|
-
job_id=self.job_id,
|
646
|
-
multiple_jobs=multiple_jobs,
|
647
|
-
)
|
648
|
-
|
649
588
|
@_fallback_to_location_from_variables
|
650
589
|
@_fallback_to_project_id_from_variables
|
651
590
|
@GoogleBaseHook.fallback_to_default_project_id
|
@@ -1027,82 +966,6 @@ class DataflowHook(GoogleBaseHook):
|
|
1027
966
|
"While reading job object after template execution error occurred. Job object has no id."
|
1028
967
|
)
|
1029
968
|
|
1030
|
-
@_fallback_to_location_from_variables
|
1031
|
-
@_fallback_to_project_id_from_variables
|
1032
|
-
@GoogleBaseHook.fallback_to_default_project_id
|
1033
|
-
@deprecated(
|
1034
|
-
planned_removal_date="March 01, 2025",
|
1035
|
-
use_instead="airflow.providers.apache.beam.hooks.beam.start.start_python_pipeline method, "
|
1036
|
-
"providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done",
|
1037
|
-
instructions="Please use airflow.providers.apache.beam.hooks.beam.start.start_python_pipeline method "
|
1038
|
-
"to start pipeline and providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done method "
|
1039
|
-
"to wait for the required pipeline state instead.",
|
1040
|
-
category=AirflowProviderDeprecationWarning,
|
1041
|
-
)
|
1042
|
-
def start_python_dataflow(
|
1043
|
-
self,
|
1044
|
-
job_name: str,
|
1045
|
-
variables: dict,
|
1046
|
-
dataflow: str,
|
1047
|
-
py_options: list[str],
|
1048
|
-
project_id: str,
|
1049
|
-
py_interpreter: str = "python3",
|
1050
|
-
py_requirements: list[str] | None = None,
|
1051
|
-
py_system_site_packages: bool = False,
|
1052
|
-
append_job_name: bool = True,
|
1053
|
-
on_new_job_id_callback: Callable[[str], None] | None = None,
|
1054
|
-
location: str = DEFAULT_DATAFLOW_LOCATION,
|
1055
|
-
):
|
1056
|
-
"""
|
1057
|
-
Start Dataflow job.
|
1058
|
-
|
1059
|
-
:param job_name: The name of the job.
|
1060
|
-
:param variables: Variables passed to the job.
|
1061
|
-
:param dataflow: Name of the Dataflow process.
|
1062
|
-
:param py_options: Additional options.
|
1063
|
-
:param project_id: The ID of the GCP project that owns the job.
|
1064
|
-
If set to ``None`` or missing, the default project_id from the GCP connection is used.
|
1065
|
-
:param py_interpreter: Python version of the beam pipeline.
|
1066
|
-
If None, this defaults to the python3.
|
1067
|
-
To track python versions supported by beam and related
|
1068
|
-
issues check: https://issues.apache.org/jira/browse/BEAM-1251
|
1069
|
-
:param py_requirements: Additional python package(s) to install.
|
1070
|
-
If a value is passed to this parameter, a new virtual environment has been created with
|
1071
|
-
additional packages installed.
|
1072
|
-
|
1073
|
-
You could also install the apache-beam package if it is not installed on your system or you want
|
1074
|
-
to use a different version.
|
1075
|
-
:param py_system_site_packages: Whether to include system_site_packages in your virtualenv.
|
1076
|
-
See virtualenv documentation for more information.
|
1077
|
-
|
1078
|
-
This option is only relevant if the ``py_requirements`` parameter is not None.
|
1079
|
-
:param append_job_name: True if unique suffix has to be appended to job name.
|
1080
|
-
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
1081
|
-
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
1082
|
-
:param on_new_job_id_callback: Callback called when the job ID is known.
|
1083
|
-
:param location: Job location.
|
1084
|
-
"""
|
1085
|
-
name = self.build_dataflow_job_name(job_name, append_job_name)
|
1086
|
-
variables["job_name"] = name
|
1087
|
-
variables["region"] = location
|
1088
|
-
variables["project"] = project_id
|
1089
|
-
|
1090
|
-
self.beam_hook.start_python_pipeline(
|
1091
|
-
variables=variables,
|
1092
|
-
py_file=dataflow,
|
1093
|
-
py_options=py_options,
|
1094
|
-
py_interpreter=py_interpreter,
|
1095
|
-
py_requirements=py_requirements,
|
1096
|
-
py_system_site_packages=py_system_site_packages,
|
1097
|
-
process_line_callback=process_line_and_extract_dataflow_job_id_callback(on_new_job_id_callback),
|
1098
|
-
)
|
1099
|
-
|
1100
|
-
self.wait_for_done(
|
1101
|
-
job_name=name,
|
1102
|
-
location=location,
|
1103
|
-
job_id=self.job_id,
|
1104
|
-
)
|
1105
|
-
|
1106
969
|
@staticmethod
|
1107
970
|
def build_dataflow_job_name(job_name: str, append_job_name: bool = True) -> str:
|
1108
971
|
"""Build Dataflow job name."""
|
@@ -1271,7 +1134,7 @@ class DataflowHook(GoogleBaseHook):
|
|
1271
1134
|
AirflowProviderDeprecationWarning,
|
1272
1135
|
stacklevel=3,
|
1273
1136
|
)
|
1274
|
-
on_new_job_id_callback(cast(str, job.get("id")))
|
1137
|
+
on_new_job_id_callback(cast("str", job.get("id")))
|
1275
1138
|
|
1276
1139
|
if on_new_job_callback:
|
1277
1140
|
on_new_job_callback(job)
|
@@ -453,8 +453,7 @@ class DataformHook(GoogleBaseHook):
|
|
453
453
|
"""
|
454
454
|
client = self.get_dataform_client()
|
455
455
|
workspace_path = (
|
456
|
-
f"projects/{project_id}/locations/{region}/"
|
457
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
456
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
458
457
|
)
|
459
458
|
request = {
|
460
459
|
"name": workspace_path,
|
@@ -496,8 +495,7 @@ class DataformHook(GoogleBaseHook):
|
|
496
495
|
"""
|
497
496
|
client = self.get_dataform_client()
|
498
497
|
workspace_path = (
|
499
|
-
f"projects/{project_id}/locations/{region}/"
|
500
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
498
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
501
499
|
)
|
502
500
|
request = {
|
503
501
|
"workspace": workspace_path,
|
@@ -542,8 +540,7 @@ class DataformHook(GoogleBaseHook):
|
|
542
540
|
"""
|
543
541
|
client = self.get_dataform_client()
|
544
542
|
workspace_path = (
|
545
|
-
f"projects/{project_id}/locations/{region}/"
|
546
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
543
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
547
544
|
)
|
548
545
|
request = {
|
549
546
|
"workspace": workspace_path,
|
@@ -587,8 +584,7 @@ class DataformHook(GoogleBaseHook):
|
|
587
584
|
"""
|
588
585
|
client = self.get_dataform_client()
|
589
586
|
workspace_path = (
|
590
|
-
f"projects/{project_id}/locations/{region}/"
|
591
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
587
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
592
588
|
)
|
593
589
|
request = {
|
594
590
|
"workspace": workspace_path,
|
@@ -629,8 +625,7 @@ class DataformHook(GoogleBaseHook):
|
|
629
625
|
"""
|
630
626
|
client = self.get_dataform_client()
|
631
627
|
workspace_path = (
|
632
|
-
f"projects/{project_id}/locations/{region}/"
|
633
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
628
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
634
629
|
)
|
635
630
|
request = {
|
636
631
|
"workspace": workspace_path,
|
@@ -671,8 +666,7 @@ class DataformHook(GoogleBaseHook):
|
|
671
666
|
"""
|
672
667
|
client = self.get_dataform_client()
|
673
668
|
workspace_path = (
|
674
|
-
f"projects/{project_id}/locations/{region}/"
|
675
|
-
f"repositories/{repository_id}/workspaces/{workspace_id}"
|
669
|
+
f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
|
676
670
|
)
|
677
671
|
request = {
|
678
672
|
"workspace": workspace_path,
|
@@ -137,8 +137,7 @@ class DataFusionHook(GoogleBaseHook):
|
|
137
137
|
|
138
138
|
# Time is up!
|
139
139
|
raise AirflowException(
|
140
|
-
f"Pipeline {pipeline_name} state {current_state} is not "
|
141
|
-
f"one of {success_states} after {timeout}s"
|
140
|
+
f"Pipeline {pipeline_name} state {current_state} is not one of {success_states} after {timeout}s"
|
142
141
|
)
|
143
142
|
|
144
143
|
@staticmethod
|
@@ -137,7 +137,7 @@ class DataplexHook(GoogleBaseHook, OperationHelper):
|
|
137
137
|
credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
|
138
138
|
)
|
139
139
|
|
140
|
-
def wait_for_operation(self, timeout: float | None
|
140
|
+
def wait_for_operation(self, operation: Operation, timeout: float | None = None):
|
141
141
|
"""Wait for long-lasting operation to complete."""
|
142
142
|
try:
|
143
143
|
return operation.result(timeout=timeout)
|
@@ -41,7 +41,6 @@ from google.api_core.exceptions import GoogleAPICallError, NotFound
|
|
41
41
|
from google.cloud import storage # type: ignore[attr-defined]
|
42
42
|
from google.cloud.exceptions import GoogleCloudError
|
43
43
|
from google.cloud.storage.retry import DEFAULT_RETRY
|
44
|
-
from requests import Session
|
45
44
|
|
46
45
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
47
46
|
from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
|
@@ -62,6 +61,7 @@ if TYPE_CHECKING:
|
|
62
61
|
from aiohttp import ClientSession
|
63
62
|
from google.api_core.retry import Retry
|
64
63
|
from google.cloud.storage.blob import Blob
|
64
|
+
from requests import Session
|
65
65
|
|
66
66
|
|
67
67
|
RT = TypeVar("RT")
|
@@ -135,16 +135,16 @@ def _fallback_object_url_to_object_name_and_bucket_name(
|
|
135
135
|
|
136
136
|
return func(self, *args, **kwargs)
|
137
137
|
|
138
|
-
return cast(Callable[FParams, RT], _inner_wrapper)
|
138
|
+
return cast("Callable[FParams, RT]", _inner_wrapper)
|
139
139
|
|
140
|
-
return cast(Callable[[T], T], _wrapper)
|
140
|
+
return cast("Callable[[T], T]", _wrapper)
|
141
141
|
|
142
142
|
|
143
143
|
# A fake bucket to use in functions decorated by _fallback_object_url_to_object_name_and_bucket_name.
|
144
144
|
# This allows the 'bucket' argument to be of type str instead of str | None,
|
145
145
|
# making it easier to type hint the function body without dealing with the None
|
146
146
|
# case that can never happen at runtime.
|
147
|
-
PROVIDE_BUCKET: str = cast(str, None)
|
147
|
+
PROVIDE_BUCKET: str = cast("str", None)
|
148
148
|
|
149
149
|
|
150
150
|
class GCSHook(GoogleBaseHook):
|
@@ -726,6 +726,14 @@ class GCSHook(GoogleBaseHook):
|
|
726
726
|
|
727
727
|
self.log.info("Blob %s deleted.", object_name)
|
728
728
|
|
729
|
+
def get_bucket(self, bucket_name: str) -> storage.Bucket:
|
730
|
+
"""
|
731
|
+
Get a bucket object from the Google Cloud Storage.
|
732
|
+
|
733
|
+
:param bucket_name: name of the bucket
|
734
|
+
"""
|
735
|
+
return self.get_conn().bucket(bucket_name)
|
736
|
+
|
729
737
|
def delete_bucket(self, bucket_name: str, force: bool = False, user_project: str | None = None) -> None:
|
730
738
|
"""
|
731
739
|
Delete a bucket object from the Google Cloud Storage.
|
@@ -1494,5 +1502,5 @@ class GCSAsyncHook(GoogleBaseAsyncHook):
|
|
1494
1502
|
token = await self.get_token(session=session)
|
1495
1503
|
return Storage(
|
1496
1504
|
token=token,
|
1497
|
-
session=cast(Session, session),
|
1505
|
+
session=cast("Session", session),
|
1498
1506
|
)
|
@@ -34,7 +34,7 @@ TIME_TO_SLEEP_IN_SECONDS = 5
|
|
34
34
|
|
35
35
|
|
36
36
|
@deprecated(
|
37
|
-
planned_removal_date="
|
37
|
+
planned_removal_date="July 08, 2025",
|
38
38
|
use_instead="Google Cloud Batch Operators' hook",
|
39
39
|
reason="The Life Sciences API (beta) will be discontinued on July 8, 2025 "
|
40
40
|
"in favor of Google Cloud Batch.",
|