apache-airflow-providers-google 14.1.0rc1__py3-none-any.whl → 15.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. airflow/providers/google/__init__.py +1 -1
  2. airflow/providers/google/ads/hooks/ads.py +7 -33
  3. airflow/providers/google/ads/transfers/ads_to_gcs.py +1 -17
  4. airflow/providers/google/cloud/hooks/bigquery.py +6 -11
  5. airflow/providers/google/cloud/hooks/cloud_batch.py +1 -2
  6. airflow/providers/google/cloud/hooks/cloud_build.py +1 -54
  7. airflow/providers/google/cloud/hooks/compute.py +4 -3
  8. airflow/providers/google/cloud/hooks/dataflow.py +2 -139
  9. airflow/providers/google/cloud/hooks/dataform.py +6 -12
  10. airflow/providers/google/cloud/hooks/datafusion.py +1 -2
  11. airflow/providers/google/cloud/hooks/dataplex.py +1 -1
  12. airflow/providers/google/cloud/hooks/gcs.py +13 -5
  13. airflow/providers/google/cloud/hooks/life_sciences.py +1 -1
  14. airflow/providers/google/cloud/hooks/translate.py +1 -1
  15. airflow/providers/google/cloud/hooks/vertex_ai/auto_ml.py +3 -2
  16. airflow/providers/google/cloud/hooks/vertex_ai/batch_prediction_job.py +1 -1
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +2 -272
  18. airflow/providers/google/cloud/hooks/vertex_ai/endpoint_service.py +2 -1
  19. airflow/providers/google/cloud/hooks/vertex_ai/hyperparameter_tuning_job.py +1 -1
  20. airflow/providers/google/cloud/hooks/vertex_ai/model_service.py +2 -1
  21. airflow/providers/google/cloud/links/cloud_storage_transfer.py +1 -3
  22. airflow/providers/google/cloud/links/dataproc.py +0 -1
  23. airflow/providers/google/cloud/log/gcs_task_handler.py +147 -115
  24. airflow/providers/google/cloud/openlineage/facets.py +32 -32
  25. airflow/providers/google/cloud/openlineage/mixins.py +2 -2
  26. airflow/providers/google/cloud/operators/automl.py +1 -1
  27. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +0 -3
  28. airflow/providers/google/cloud/operators/datafusion.py +1 -22
  29. airflow/providers/google/cloud/operators/dataproc.py +1 -143
  30. airflow/providers/google/cloud/operators/dataproc_metastore.py +0 -1
  31. airflow/providers/google/cloud/operators/mlengine.py +3 -1406
  32. airflow/providers/google/cloud/operators/spanner.py +1 -2
  33. airflow/providers/google/cloud/operators/translate.py +2 -2
  34. airflow/providers/google/cloud/operators/vertex_ai/auto_ml.py +0 -12
  35. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +1 -22
  36. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +4 -3
  37. airflow/providers/google/cloud/sensors/dataproc_metastore.py +1 -1
  38. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +1 -2
  39. airflow/providers/google/cloud/transfers/sftp_to_gcs.py +23 -10
  40. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -2
  41. airflow/providers/google/common/auth_backend/google_openid.py +1 -1
  42. airflow/providers/google/common/hooks/base_google.py +7 -28
  43. airflow/providers/google/get_provider_info.py +3 -1
  44. airflow/providers/google/marketing_platform/sensors/display_video.py +1 -1
  45. airflow/providers/google/suite/hooks/drive.py +2 -2
  46. {apache_airflow_providers_google-14.1.0rc1.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/METADATA +8 -6
  47. {apache_airflow_providers_google-14.1.0rc1.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/RECORD +49 -50
  48. airflow/providers/google/cloud/utils/mlengine_operator_utils.py +0 -273
  49. {apache_airflow_providers_google-14.1.0rc1.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/WHEEL +0 -0
  50. {apache_airflow_providers_google-14.1.0rc1.dist-info → apache_airflow_providers_google-15.0.0rc1.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "14.1.0"
32
+ __version__ = "15.0.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.9.0"
@@ -19,7 +19,6 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- import warnings
23
22
  from functools import cached_property
24
23
  from tempfile import NamedTemporaryFile
25
24
  from typing import IO, TYPE_CHECKING, Any, Literal
@@ -28,7 +27,7 @@ from google.ads.googleads.client import GoogleAdsClient
28
27
  from google.ads.googleads.errors import GoogleAdsException
29
28
  from google.auth.exceptions import GoogleAuthError
30
29
 
31
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
30
+ from airflow.exceptions import AirflowException
32
31
  from airflow.hooks.base import BaseHook
33
32
  from airflow.providers.google.common.hooks.base_google import get_field
34
33
 
@@ -116,9 +115,7 @@ class GoogleAdsHook(BaseHook):
116
115
  self.google_ads_config: dict[str, Any] = {}
117
116
  self.authentication_method: Literal["service_account", "developer_token"] = "service_account"
118
117
 
119
- def search(
120
- self, client_ids: list[str], query: str, page_size: int | None = None, **kwargs
121
- ) -> list[GoogleAdsRow]:
118
+ def search(self, client_ids: list[str], query: str, **kwargs) -> list[GoogleAdsRow]:
122
119
  """
123
120
  Pull data from the Google Ads API.
124
121
 
@@ -134,18 +131,14 @@ class GoogleAdsHook(BaseHook):
134
131
 
135
132
  :param client_ids: Google Ads client ID(s) to query the API for.
136
133
  :param query: Google Ads Query Language query.
137
- :param page_size: Number of results to return per page. Max 10000 (for version 16 and 16.1)
138
- This parameter deprecated. After February 05, 2025, it will be removed.
139
134
  :return: Google Ads API response, converted to Google Ads Row objects.
140
135
  """
141
- data_proto_plus = self._search(client_ids, query, page_size, **kwargs)
136
+ data_proto_plus = self._search(client_ids, query, **kwargs)
142
137
  data_native_pb = [row._pb for row in data_proto_plus]
143
138
 
144
139
  return data_native_pb
145
140
 
146
- def search_proto_plus(
147
- self, client_ids: list[str], query: str, page_size: int | None = None, **kwargs
148
- ) -> list[GoogleAdsRow]:
141
+ def search_proto_plus(self, client_ids: list[str], query: str, **kwargs) -> list[GoogleAdsRow]:
149
142
  """
150
143
  Pull data from the Google Ads API.
151
144
 
@@ -154,11 +147,9 @@ class GoogleAdsHook(BaseHook):
154
147
 
155
148
  :param client_ids: Google Ads client ID(s) to query the API for.
156
149
  :param query: Google Ads Query Language query.
157
- :param page_size: Number of results to return per page. Max 10000 (for version 16 and 16.1)
158
- This parameter is deprecated. After February 05, 2025, it will be removed.
159
150
  :return: Google Ads API response, converted to Google Ads Row objects
160
151
  """
161
- return self._search(client_ids, query, page_size, **kwargs)
152
+ return self._search(client_ids, query, **kwargs)
162
153
 
163
154
  def list_accessible_customers(self) -> list[str]:
164
155
  """
@@ -269,37 +260,20 @@ class GoogleAdsHook(BaseHook):
269
260
 
270
261
  self.google_ads_config["json_key_file_path"] = secrets_temp.name
271
262
 
272
- def _search(
273
- self, client_ids: list[str], query: str, page_size: int | None = None, **kwargs
274
- ) -> list[GoogleAdsRow]:
263
+ def _search(self, client_ids: list[str], query: str, **kwargs) -> list[GoogleAdsRow]:
275
264
  """
276
265
  Pull data from the Google Ads API.
277
266
 
278
267
  :param client_ids: Google Ads client ID(s) to query the API for.
279
268
  :param query: Google Ads Query Language query.
280
- :param page_size: Number of results to return per page. Max 10000 (for version 16 and 16.1)
281
- This parameter is deprecated. After February 05, 2025, it will be removed.
282
269
 
283
270
  :return: Google Ads API response, converted to Google Ads Row objects
284
271
  """
285
272
  service = self._get_service
286
273
 
287
- extra_req_params = {}
288
- if self.api_version == "v16": # TODO: remove this after deprecation removal for page_size parameter
289
- extra_req_params["page_size"] = page_size or 10000
290
- else:
291
- if page_size:
292
- warnings.warn(
293
- "page_size parameter for the GoogleAdsHook.search and "
294
- "GoogleAdsHook.search_proto_plus method is deprecated and will be removed "
295
- "after February 05, 2025.",
296
- AirflowProviderDeprecationWarning,
297
- stacklevel=2,
298
- )
299
-
300
274
  iterators = []
301
275
  for client_id in client_ids:
302
- iterator = service.search(request={"customer_id": client_id, "query": query, **extra_req_params})
276
+ iterator = service.search(request={"customer_id": client_id, "query": query})
303
277
  iterators.append(iterator)
304
278
 
305
279
  self.log.info("Fetched Google Ads Iterators")
@@ -17,13 +17,11 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  import csv
20
- import warnings
21
20
  from collections.abc import Sequence
22
21
  from operator import attrgetter
23
22
  from tempfile import NamedTemporaryFile
24
23
  from typing import TYPE_CHECKING
25
24
 
26
- from airflow.exceptions import AirflowProviderDeprecationWarning
27
25
  from airflow.models import BaseOperator
28
26
  from airflow.providers.google.ads.hooks.ads import GoogleAdsHook
29
27
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
@@ -54,8 +52,6 @@ class GoogleAdsToGcsOperator(BaseOperator):
54
52
  :param obj: GCS path to save the object. Must be the full file path (ex. `path/to/file.txt`)
55
53
  :param gcp_conn_id: Airflow Google Cloud connection ID
56
54
  :param google_ads_conn_id: Airflow Google Ads connection ID
57
- :param page_size: The number of results per API page request. Max 10,000 (for version 16 and 16.1)
58
- This parameter deprecated. After March 01, 2025, it will be removed.
59
55
  :param gzip: Option to compress local file or file data for upload
60
56
  :param impersonation_chain: Optional service account to impersonate using short-term
61
57
  credentials, or chained list of accounts required to get the access_token
@@ -87,7 +83,6 @@ class GoogleAdsToGcsOperator(BaseOperator):
87
83
  obj: str,
88
84
  gcp_conn_id: str = "google_cloud_default",
89
85
  google_ads_conn_id: str = "google_ads_default",
90
- page_size: int | None = None,
91
86
  gzip: bool = False,
92
87
  impersonation_chain: str | Sequence[str] | None = None,
93
88
  api_version: str | None = None,
@@ -101,8 +96,6 @@ class GoogleAdsToGcsOperator(BaseOperator):
101
96
  self.obj = obj
102
97
  self.gcp_conn_id = gcp_conn_id
103
98
  self.google_ads_conn_id = google_ads_conn_id
104
- # TODO: remove this after deprecation removal for page_size parameter
105
- self.page_size = page_size or 10000 if api_version == "v16" else None
106
99
  self.gzip = gzip
107
100
  self.impersonation_chain = impersonation_chain
108
101
  self.api_version = api_version
@@ -114,16 +107,7 @@ class GoogleAdsToGcsOperator(BaseOperator):
114
107
  api_version=self.api_version,
115
108
  )
116
109
 
117
- if self.api_version != "v16" and self.page_size:
118
- warnings.warn(
119
- "page_size parameter for the GoogleAdsToGcsOperator is deprecated and will be removed "
120
- "after March 01, 2025.",
121
- AirflowProviderDeprecationWarning,
122
- stacklevel=2,
123
- )
124
- rows = service.search(client_ids=self.client_ids, query=self.query)
125
- else:
126
- rows = service.search(client_ids=self.client_ids, query=self.query, page_size=self.page_size)
110
+ rows = service.search(client_ids=self.client_ids, query=self.query)
127
111
 
128
112
  try:
129
113
  getter = attrgetter(*self.attributes)
@@ -56,7 +56,6 @@ from google.cloud.exceptions import NotFound
56
56
  from googleapiclient.discovery import build
57
57
  from pandas_gbq import read_gbq
58
58
  from pandas_gbq.gbq import GbqConnector # noqa: F401 used in ``airflow.contrib.hooks.bigquery``
59
- from requests import Session
60
59
  from sqlalchemy import create_engine
61
60
 
62
61
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -80,6 +79,7 @@ if TYPE_CHECKING:
80
79
  import pandas as pd
81
80
  from google.api_core.page_iterator import HTTPIterator
82
81
  from google.api_core.retry import Retry
82
+ from requests import Session
83
83
 
84
84
  log = logging.getLogger(__name__)
85
85
 
@@ -2116,7 +2116,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2116
2116
  job_id=job_id,
2117
2117
  project=project_id,
2118
2118
  token=token,
2119
- session=cast(Session, session),
2119
+ session=cast("Session", session),
2120
2120
  )
2121
2121
 
2122
2122
  async def _get_job(
@@ -2181,7 +2181,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2181
2181
  async with ClientSession() as session:
2182
2182
  self.log.info("Executing get_job_output..")
2183
2183
  job_client = await self.get_job_instance(project_id, job_id, session)
2184
- job_query_response = await job_client.get_query_results(cast(Session, session))
2184
+ job_query_response = await job_client.get_query_results(cast("Session", session))
2185
2185
  return job_query_response
2186
2186
 
2187
2187
  async def create_job_for_partition_get(
@@ -2201,7 +2201,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2201
2201
  + (f" WHERE table_name='{table_id}'" if table_id else ""),
2202
2202
  "useLegacySql": False,
2203
2203
  }
2204
- job_query_resp = await job_client.query(query_request, cast(Session, session))
2204
+ job_query_resp = await job_client.query(query_request, cast("Session", session))
2205
2205
  return job_query_resp["jobReference"]["jobId"]
2206
2206
 
2207
2207
  async def cancel_job(self, job_id: str, project_id: str | None, location: str | None) -> None:
@@ -2381,12 +2381,7 @@ class BigQueryAsyncHook(GoogleBaseAsyncHook):
2381
2381
  test_results[metric] = float(ratios[metric]) < threshold
2382
2382
 
2383
2383
  self.log.info(
2384
- (
2385
- "Current metric for %s: %s\n"
2386
- "Past metric for %s: %s\n"
2387
- "Ratio for %s: %s\n"
2388
- "Threshold: %s\n"
2389
- ),
2384
+ ("Current metric for %s: %s\nPast metric for %s: %s\nRatio for %s: %s\nThreshold: %s\n"),
2390
2385
  metric,
2391
2386
  cur,
2392
2387
  metric,
@@ -2451,5 +2446,5 @@ class BigQueryTableAsyncHook(GoogleBaseAsyncHook):
2451
2446
  table_name=table_id,
2452
2447
  project=project_id,
2453
2448
  token=token,
2454
- session=cast(Session, session),
2449
+ session=cast("Session", session),
2455
2450
  )
@@ -155,8 +155,7 @@ class CloudBatchHook(GoogleBaseHook):
155
155
  raise AirflowException(message)
156
156
  elif status == JobStatus.State.DELETION_IN_PROGRESS:
157
157
  message = (
158
- "Unexpected error in the operation: "
159
- "Batch job with name {job_name} is being deleted."
158
+ "Unexpected error in the operation: Batch job with name {job_name} is being deleted."
160
159
  )
161
160
  raise AirflowException(message)
162
161
  else:
@@ -27,9 +27,8 @@ from google.api_core.exceptions import AlreadyExists
27
27
  from google.api_core.gapic_v1.method import DEFAULT, _MethodDefault
28
28
  from google.cloud.devtools.cloudbuild_v1 import CloudBuildAsyncClient, CloudBuildClient, GetBuildRequest
29
29
 
30
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
30
+ from airflow.exceptions import AirflowException
31
31
  from airflow.providers.google.common.consts import CLIENT_INFO
32
- from airflow.providers.google.common.deprecated import deprecated
33
32
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
34
33
  from airflow.providers.google.common.hooks.operation_helpers import OperationHelper
35
34
 
@@ -178,58 +177,6 @@ class CloudBuildHook(GoogleBaseHook, OperationHelper):
178
177
 
179
178
  return operation, id_
180
179
 
181
- @GoogleBaseHook.fallback_to_default_project_id
182
- @deprecated(
183
- planned_removal_date="March 01, 2025",
184
- use_instead="create_build_without_waiting_for_result",
185
- category=AirflowProviderDeprecationWarning,
186
- )
187
- def create_build(
188
- self,
189
- build: dict | Build,
190
- project_id: str = PROVIDE_PROJECT_ID,
191
- wait: bool = True,
192
- retry: Retry | _MethodDefault = DEFAULT,
193
- timeout: float | None = None,
194
- metadata: Sequence[tuple[str, str]] = (),
195
- ) -> Build:
196
- """
197
- Start a build with the specified configuration.
198
-
199
- :param build: The build resource to create. If a dict is provided, it must be of the same form
200
- as the protobuf message `google.cloud.devtools.cloudbuild_v1.types.Build`
201
- :param project_id: Optional, Google Cloud Project project_id where the function belongs.
202
- If set to None or missing, the default project_id from the GCP connection is used.
203
- :param wait: Optional, wait for operation to finish.
204
- :param retry: Optional, a retry object used to retry requests. If `None` is specified, requests
205
- will not be retried.
206
- :param timeout: Optional, the amount of time, in seconds, to wait for the request to complete.
207
- Note that if `retry` is specified, the timeout applies to each individual attempt.
208
- :param metadata: Optional, additional metadata that is provided to the method.
209
-
210
- """
211
- client = self.get_conn()
212
-
213
- self.log.info("Start creating build...")
214
-
215
- operation = client.create_build(
216
- request={"project_id": project_id, "build": build},
217
- retry=retry,
218
- timeout=timeout,
219
- metadata=metadata,
220
- )
221
-
222
- id_ = self._get_build_id_from_operation(operation)
223
-
224
- if not wait:
225
- return self.get_build(id_=id_, project_id=project_id)
226
-
227
- operation.result()
228
-
229
- self.log.info("Build has been created: %s.", id_)
230
-
231
- return self.get_build(id_=id_, project_id=project_id)
232
-
233
180
  @GoogleBaseHook.fallback_to_default_project_id
234
181
  def create_build_trigger(
235
182
  self,
@@ -29,6 +29,7 @@ from google.cloud.compute_v1.services.instances import InstancesClient
29
29
  from googleapiclient.discovery import build
30
30
 
31
31
  from airflow.exceptions import AirflowException
32
+ from airflow.providers.google.common.consts import CLIENT_INFO
32
33
  from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID, GoogleBaseHook
33
34
 
34
35
  if TYPE_CHECKING:
@@ -85,15 +86,15 @@ class ComputeEngineHook(GoogleBaseHook):
85
86
 
86
87
  def get_compute_instance_template_client(self):
87
88
  """Return Compute Engine Instance Template Client."""
88
- return InstanceTemplatesClient(credentials=self.get_credentials(), client_info=self.client_info)
89
+ return InstanceTemplatesClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
89
90
 
90
91
  def get_compute_instance_client(self):
91
92
  """Return Compute Engine Instance Client."""
92
- return InstancesClient(credentials=self.get_credentials(), client_info=self.client_info)
93
+ return InstancesClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
93
94
 
94
95
  def get_compute_instance_group_managers_client(self):
95
96
  """Return Compute Engine Instance Group Managers Client."""
96
- return InstanceGroupManagersClient(credentials=self.get_credentials(), client_info=self.client_info)
97
+ return InstanceGroupManagersClient(credentials=self.get_credentials(), client_info=CLIENT_INFO)
97
98
 
98
99
  @GoogleBaseHook.fallback_to_default_project_id
99
100
  def insert_instance_template(
@@ -51,7 +51,6 @@ from googleapiclient.discovery import Resource, build
51
51
 
52
52
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
53
53
  from airflow.providers.apache.beam.hooks.beam import BeamHook, BeamRunnerType, beam_options_to_args
54
- from airflow.providers.google.common.deprecated import deprecated
55
54
  from airflow.providers.google.common.hooks.base_google import (
56
55
  PROVIDE_PROJECT_ID,
57
56
  GoogleBaseAsyncHook,
@@ -136,7 +135,7 @@ def _fallback_variable_parameter(parameter_name: str, variable_key_name: str) ->
136
135
 
137
136
  return func(self, *args, **kwargs)
138
137
 
139
- return cast(T, inner_wrapper)
138
+ return cast("T", inner_wrapper)
140
139
 
141
140
  return _wrapper
142
141
 
@@ -586,66 +585,6 @@ class DataflowHook(GoogleBaseHook):
586
585
  http_authorized = self._authorize()
587
586
  return build("datapipelines", "v1", http=http_authorized, cache_discovery=False)
588
587
 
589
- @_fallback_to_location_from_variables
590
- @_fallback_to_project_id_from_variables
591
- @GoogleBaseHook.fallback_to_default_project_id
592
- @deprecated(
593
- planned_removal_date="March 01, 2025",
594
- use_instead="airflow.providers.apache.beam.hooks.beam.start.start_java_pipeline, "
595
- "providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done",
596
- instructions="Please use airflow.providers.apache.beam.hooks.beam.start.start_java_pipeline "
597
- "to start pipeline and providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done method "
598
- "to wait for the required pipeline state instead.",
599
- category=AirflowProviderDeprecationWarning,
600
- )
601
- def start_java_dataflow(
602
- self,
603
- job_name: str,
604
- variables: dict,
605
- jar: str,
606
- project_id: str,
607
- job_class: str | None = None,
608
- append_job_name: bool = True,
609
- multiple_jobs: bool = False,
610
- on_new_job_id_callback: Callable[[str], None] | None = None,
611
- location: str = DEFAULT_DATAFLOW_LOCATION,
612
- ) -> None:
613
- """
614
- Start Dataflow java job.
615
-
616
- :param job_name: The name of the job.
617
- :param variables: Variables passed to the job.
618
- :param project_id: Optional, the Google Cloud project ID in which to start a job.
619
- If set to None or missing, the default project_id from the Google Cloud connection is used.
620
- :param jar: Name of the jar for the job
621
- :param job_class: Name of the java class for the job.
622
- :param append_job_name: True if unique suffix has to be appended to job name.
623
- :param multiple_jobs: True if to check for multiple job in dataflow
624
- :param on_new_job_id_callback: Callback called when the job ID is known.
625
- :param location: Job location.
626
- """
627
- name = self.build_dataflow_job_name(job_name, append_job_name)
628
-
629
- variables["jobName"] = name
630
- variables["region"] = location
631
- variables["project"] = project_id
632
-
633
- if "labels" in variables:
634
- variables["labels"] = json.dumps(variables["labels"], separators=(",", ":"))
635
-
636
- self.beam_hook.start_java_pipeline(
637
- variables=variables,
638
- jar=jar,
639
- job_class=job_class,
640
- process_line_callback=process_line_and_extract_dataflow_job_id_callback(on_new_job_id_callback),
641
- )
642
- self.wait_for_done(
643
- job_name=name,
644
- location=location,
645
- job_id=self.job_id,
646
- multiple_jobs=multiple_jobs,
647
- )
648
-
649
588
  @_fallback_to_location_from_variables
650
589
  @_fallback_to_project_id_from_variables
651
590
  @GoogleBaseHook.fallback_to_default_project_id
@@ -1027,82 +966,6 @@ class DataflowHook(GoogleBaseHook):
1027
966
  "While reading job object after template execution error occurred. Job object has no id."
1028
967
  )
1029
968
 
1030
- @_fallback_to_location_from_variables
1031
- @_fallback_to_project_id_from_variables
1032
- @GoogleBaseHook.fallback_to_default_project_id
1033
- @deprecated(
1034
- planned_removal_date="March 01, 2025",
1035
- use_instead="airflow.providers.apache.beam.hooks.beam.start.start_python_pipeline method, "
1036
- "providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done",
1037
- instructions="Please use airflow.providers.apache.beam.hooks.beam.start.start_python_pipeline method "
1038
- "to start pipeline and providers.google.cloud.hooks.dataflow.DataflowHook.wait_for_done method "
1039
- "to wait for the required pipeline state instead.",
1040
- category=AirflowProviderDeprecationWarning,
1041
- )
1042
- def start_python_dataflow(
1043
- self,
1044
- job_name: str,
1045
- variables: dict,
1046
- dataflow: str,
1047
- py_options: list[str],
1048
- project_id: str,
1049
- py_interpreter: str = "python3",
1050
- py_requirements: list[str] | None = None,
1051
- py_system_site_packages: bool = False,
1052
- append_job_name: bool = True,
1053
- on_new_job_id_callback: Callable[[str], None] | None = None,
1054
- location: str = DEFAULT_DATAFLOW_LOCATION,
1055
- ):
1056
- """
1057
- Start Dataflow job.
1058
-
1059
- :param job_name: The name of the job.
1060
- :param variables: Variables passed to the job.
1061
- :param dataflow: Name of the Dataflow process.
1062
- :param py_options: Additional options.
1063
- :param project_id: The ID of the GCP project that owns the job.
1064
- If set to ``None`` or missing, the default project_id from the GCP connection is used.
1065
- :param py_interpreter: Python version of the beam pipeline.
1066
- If None, this defaults to the python3.
1067
- To track python versions supported by beam and related
1068
- issues check: https://issues.apache.org/jira/browse/BEAM-1251
1069
- :param py_requirements: Additional python package(s) to install.
1070
- If a value is passed to this parameter, a new virtual environment has been created with
1071
- additional packages installed.
1072
-
1073
- You could also install the apache-beam package if it is not installed on your system or you want
1074
- to use a different version.
1075
- :param py_system_site_packages: Whether to include system_site_packages in your virtualenv.
1076
- See virtualenv documentation for more information.
1077
-
1078
- This option is only relevant if the ``py_requirements`` parameter is not None.
1079
- :param append_job_name: True if unique suffix has to be appended to job name.
1080
- :param project_id: Optional, the Google Cloud project ID in which to start a job.
1081
- If set to None or missing, the default project_id from the Google Cloud connection is used.
1082
- :param on_new_job_id_callback: Callback called when the job ID is known.
1083
- :param location: Job location.
1084
- """
1085
- name = self.build_dataflow_job_name(job_name, append_job_name)
1086
- variables["job_name"] = name
1087
- variables["region"] = location
1088
- variables["project"] = project_id
1089
-
1090
- self.beam_hook.start_python_pipeline(
1091
- variables=variables,
1092
- py_file=dataflow,
1093
- py_options=py_options,
1094
- py_interpreter=py_interpreter,
1095
- py_requirements=py_requirements,
1096
- py_system_site_packages=py_system_site_packages,
1097
- process_line_callback=process_line_and_extract_dataflow_job_id_callback(on_new_job_id_callback),
1098
- )
1099
-
1100
- self.wait_for_done(
1101
- job_name=name,
1102
- location=location,
1103
- job_id=self.job_id,
1104
- )
1105
-
1106
969
  @staticmethod
1107
970
  def build_dataflow_job_name(job_name: str, append_job_name: bool = True) -> str:
1108
971
  """Build Dataflow job name."""
@@ -1271,7 +1134,7 @@ class DataflowHook(GoogleBaseHook):
1271
1134
  AirflowProviderDeprecationWarning,
1272
1135
  stacklevel=3,
1273
1136
  )
1274
- on_new_job_id_callback(cast(str, job.get("id")))
1137
+ on_new_job_id_callback(cast("str", job.get("id")))
1275
1138
 
1276
1139
  if on_new_job_callback:
1277
1140
  on_new_job_callback(job)
@@ -453,8 +453,7 @@ class DataformHook(GoogleBaseHook):
453
453
  """
454
454
  client = self.get_dataform_client()
455
455
  workspace_path = (
456
- f"projects/{project_id}/locations/{region}/"
457
- f"repositories/{repository_id}/workspaces/{workspace_id}"
456
+ f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
458
457
  )
459
458
  request = {
460
459
  "name": workspace_path,
@@ -496,8 +495,7 @@ class DataformHook(GoogleBaseHook):
496
495
  """
497
496
  client = self.get_dataform_client()
498
497
  workspace_path = (
499
- f"projects/{project_id}/locations/{region}/"
500
- f"repositories/{repository_id}/workspaces/{workspace_id}"
498
+ f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
501
499
  )
502
500
  request = {
503
501
  "workspace": workspace_path,
@@ -542,8 +540,7 @@ class DataformHook(GoogleBaseHook):
542
540
  """
543
541
  client = self.get_dataform_client()
544
542
  workspace_path = (
545
- f"projects/{project_id}/locations/{region}/"
546
- f"repositories/{repository_id}/workspaces/{workspace_id}"
543
+ f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
547
544
  )
548
545
  request = {
549
546
  "workspace": workspace_path,
@@ -587,8 +584,7 @@ class DataformHook(GoogleBaseHook):
587
584
  """
588
585
  client = self.get_dataform_client()
589
586
  workspace_path = (
590
- f"projects/{project_id}/locations/{region}/"
591
- f"repositories/{repository_id}/workspaces/{workspace_id}"
587
+ f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
592
588
  )
593
589
  request = {
594
590
  "workspace": workspace_path,
@@ -629,8 +625,7 @@ class DataformHook(GoogleBaseHook):
629
625
  """
630
626
  client = self.get_dataform_client()
631
627
  workspace_path = (
632
- f"projects/{project_id}/locations/{region}/"
633
- f"repositories/{repository_id}/workspaces/{workspace_id}"
628
+ f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
634
629
  )
635
630
  request = {
636
631
  "workspace": workspace_path,
@@ -671,8 +666,7 @@ class DataformHook(GoogleBaseHook):
671
666
  """
672
667
  client = self.get_dataform_client()
673
668
  workspace_path = (
674
- f"projects/{project_id}/locations/{region}/"
675
- f"repositories/{repository_id}/workspaces/{workspace_id}"
669
+ f"projects/{project_id}/locations/{region}/repositories/{repository_id}/workspaces/{workspace_id}"
676
670
  )
677
671
  request = {
678
672
  "workspace": workspace_path,
@@ -137,8 +137,7 @@ class DataFusionHook(GoogleBaseHook):
137
137
 
138
138
  # Time is up!
139
139
  raise AirflowException(
140
- f"Pipeline {pipeline_name} state {current_state} is not "
141
- f"one of {success_states} after {timeout}s"
140
+ f"Pipeline {pipeline_name} state {current_state} is not one of {success_states} after {timeout}s"
142
141
  )
143
142
 
144
143
  @staticmethod
@@ -137,7 +137,7 @@ class DataplexHook(GoogleBaseHook, OperationHelper):
137
137
  credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options
138
138
  )
139
139
 
140
- def wait_for_operation(self, timeout: float | None, operation: Operation):
140
+ def wait_for_operation(self, operation: Operation, timeout: float | None = None):
141
141
  """Wait for long-lasting operation to complete."""
142
142
  try:
143
143
  return operation.result(timeout=timeout)
@@ -41,7 +41,6 @@ from google.api_core.exceptions import GoogleAPICallError, NotFound
41
41
  from google.cloud import storage # type: ignore[attr-defined]
42
42
  from google.cloud.exceptions import GoogleCloudError
43
43
  from google.cloud.storage.retry import DEFAULT_RETRY
44
- from requests import Session
45
44
 
46
45
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
47
46
  from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
@@ -62,6 +61,7 @@ if TYPE_CHECKING:
62
61
  from aiohttp import ClientSession
63
62
  from google.api_core.retry import Retry
64
63
  from google.cloud.storage.blob import Blob
64
+ from requests import Session
65
65
 
66
66
 
67
67
  RT = TypeVar("RT")
@@ -135,16 +135,16 @@ def _fallback_object_url_to_object_name_and_bucket_name(
135
135
 
136
136
  return func(self, *args, **kwargs)
137
137
 
138
- return cast(Callable[FParams, RT], _inner_wrapper)
138
+ return cast("Callable[FParams, RT]", _inner_wrapper)
139
139
 
140
- return cast(Callable[[T], T], _wrapper)
140
+ return cast("Callable[[T], T]", _wrapper)
141
141
 
142
142
 
143
143
  # A fake bucket to use in functions decorated by _fallback_object_url_to_object_name_and_bucket_name.
144
144
  # This allows the 'bucket' argument to be of type str instead of str | None,
145
145
  # making it easier to type hint the function body without dealing with the None
146
146
  # case that can never happen at runtime.
147
- PROVIDE_BUCKET: str = cast(str, None)
147
+ PROVIDE_BUCKET: str = cast("str", None)
148
148
 
149
149
 
150
150
  class GCSHook(GoogleBaseHook):
@@ -726,6 +726,14 @@ class GCSHook(GoogleBaseHook):
726
726
 
727
727
  self.log.info("Blob %s deleted.", object_name)
728
728
 
729
+ def get_bucket(self, bucket_name: str) -> storage.Bucket:
730
+ """
731
+ Get a bucket object from the Google Cloud Storage.
732
+
733
+ :param bucket_name: name of the bucket
734
+ """
735
+ return self.get_conn().bucket(bucket_name)
736
+
729
737
  def delete_bucket(self, bucket_name: str, force: bool = False, user_project: str | None = None) -> None:
730
738
  """
731
739
  Delete a bucket object from the Google Cloud Storage.
@@ -1494,5 +1502,5 @@ class GCSAsyncHook(GoogleBaseAsyncHook):
1494
1502
  token = await self.get_token(session=session)
1495
1503
  return Storage(
1496
1504
  token=token,
1497
- session=cast(Session, session),
1505
+ session=cast("Session", session),
1498
1506
  )
@@ -34,7 +34,7 @@ TIME_TO_SLEEP_IN_SECONDS = 5
34
34
 
35
35
 
36
36
  @deprecated(
37
- planned_removal_date="March 01, 2025",
37
+ planned_removal_date="July 08, 2025",
38
38
  use_instead="Google Cloud Batch Operators' hook",
39
39
  reason="The Life Sciences API (beta) will be discontinued on July 8, 2025 "
40
40
  "in favor of Google Cloud Batch.",