apache-airflow-providers-google 10.12.0__py3-none-any.whl → 10.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/cloud/fs/gcs.py +16 -13
  3. airflow/providers/google/cloud/hooks/bigquery_dts.py +2 -1
  4. airflow/providers/google/cloud/hooks/cloud_build.py +2 -1
  5. airflow/providers/google/cloud/hooks/cloud_composer.py +4 -3
  6. airflow/providers/google/cloud/hooks/compute_ssh.py +18 -6
  7. airflow/providers/google/cloud/hooks/dataflow.py +61 -3
  8. airflow/providers/google/cloud/hooks/dataplex.py +2 -1
  9. airflow/providers/google/cloud/hooks/dataproc.py +19 -18
  10. airflow/providers/google/cloud/hooks/gcs.py +10 -6
  11. airflow/providers/google/cloud/hooks/pubsub.py +3 -2
  12. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -39
  13. airflow/providers/google/cloud/log/stackdriver_task_handler.py +2 -11
  14. airflow/providers/google/cloud/operators/bigquery.py +47 -47
  15. airflow/providers/google/cloud/operators/cloud_composer.py +1 -1
  16. airflow/providers/google/cloud/operators/cloud_run.py +3 -3
  17. airflow/providers/google/cloud/operators/dataflow.py +6 -0
  18. airflow/providers/google/cloud/operators/dataplex.py +530 -1
  19. airflow/providers/google/cloud/operators/dataproc.py +11 -11
  20. airflow/providers/google/cloud/operators/gcs.py +90 -15
  21. airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -3
  22. airflow/providers/google/cloud/operators/pubsub.py +47 -55
  23. airflow/providers/google/cloud/secrets/secret_manager.py +22 -1
  24. airflow/providers/google/cloud/sensors/cloud_composer.py +14 -1
  25. airflow/providers/google/cloud/sensors/dataplex.py +118 -0
  26. airflow/providers/google/cloud/sensors/gcs.py +10 -1
  27. airflow/providers/google/cloud/transfers/adls_to_gcs.py +5 -5
  28. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +42 -42
  29. airflow/providers/google/cloud/transfers/mssql_to_gcs.py +9 -9
  30. airflow/providers/google/cloud/triggers/cloud_run.py +7 -7
  31. airflow/providers/google/cloud/triggers/dataplex.py +82 -0
  32. airflow/providers/google/cloud/triggers/dataproc.py +2 -5
  33. airflow/providers/google/cloud/triggers/gcs.py +13 -3
  34. airflow/providers/google/cloud/triggers/kubernetes_engine.py +3 -1
  35. airflow/providers/google/common/hooks/base_google.py +6 -4
  36. airflow/providers/google/get_provider_info.py +14 -13
  37. {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/METADATA +30 -30
  38. {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/RECORD +40 -40
  39. {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/WHEEL +0 -0
  40. {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/entry_points.txt +0 -0
@@ -27,7 +27,7 @@ import packaging.version
27
27
 
28
28
  __all__ = ["__version__"]
29
29
 
30
- __version__ = "10.12.0"
30
+ __version__ = "10.13.0"
31
31
 
32
32
  try:
33
33
  from airflow import __version__ as airflow_version
@@ -35,8 +35,8 @@ except ImportError:
35
35
  from airflow.version import version as airflow_version
36
36
 
37
37
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
38
- "2.5.0"
38
+ "2.6.0"
39
39
  ):
40
40
  raise RuntimeError(
41
- f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.5.0+"
41
+ f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.6.0+"
42
42
  )
@@ -39,7 +39,7 @@ GCS_VERSION_AWARE = "gcs.version-aware"
39
39
  schemes = ["gs", "gcs"]
40
40
 
41
41
 
42
- def get_fs(conn_id: str | None) -> AbstractFileSystem:
42
+ def get_fs(conn_id: str | None, storage_options: dict[str, str] | None = None) -> AbstractFileSystem:
43
43
  # https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
44
44
  from gcsfs import GCSFileSystem
45
45
 
@@ -49,15 +49,18 @@ def get_fs(conn_id: str | None) -> AbstractFileSystem:
49
49
  g = GoogleBaseHook(gcp_conn_id=conn_id)
50
50
  creds = g.get_credentials()
51
51
 
52
- return GCSFileSystem(
53
- project=g.project_id,
54
- access=g.extras.get(GCS_ACCESS, "full_control"),
55
- token=creds.token,
56
- consistency=g.extras.get(GCS_CONSISTENCY, "none"),
57
- cache_timeout=g.extras.get(GCS_CACHE_TIMEOUT),
58
- requester_pays=g.extras.get(GCS_REQUESTER_PAYS, False),
59
- session_kwargs=g.extras.get(GCS_SESSION_KWARGS, {}),
60
- endpoint_url=g.extras.get(GCS_ENDPOINT),
61
- default_location=g.extras.get(GCS_DEFAULT_LOCATION),
62
- version_aware=g.extras.get(GCS_VERSION_AWARE, "false").lower() == "true",
63
- )
52
+ options = {
53
+ "project": g.project_id,
54
+ "access": g.extras.get(GCS_ACCESS, "full_control"),
55
+ "token": creds.token,
56
+ "consistency": g.extras.get(GCS_CONSISTENCY, "none"),
57
+ "cache_timeout": g.extras.get(GCS_CACHE_TIMEOUT),
58
+ "requester_pays": g.extras.get(GCS_REQUESTER_PAYS, False),
59
+ "session_kwargs": g.extras.get(GCS_SESSION_KWARGS, {}),
60
+ "endpoint_url": g.extras.get(GCS_ENDPOINT),
61
+ "default_location": g.extras.get(GCS_DEFAULT_LOCATION),
62
+ "version_aware": g.extras.get(GCS_VERSION_AWARE, "false").lower() == "true",
63
+ }
64
+ options.update(storage_options or {})
65
+
66
+ return GCSFileSystem(**options)
@@ -38,6 +38,7 @@ from airflow.providers.google.common.hooks.base_google import (
38
38
 
39
39
  if TYPE_CHECKING:
40
40
  from google.api_core.retry import Retry
41
+ from google.api_core.retry_async import AsyncRetry
41
42
  from googleapiclient.discovery import Resource
42
43
 
43
44
 
@@ -321,7 +322,7 @@ class AsyncBiqQueryDataTransferServiceHook(GoogleBaseAsyncHook):
321
322
  run_id: str,
322
323
  project_id: str | None,
323
324
  location: str | None = None,
324
- retry: Retry | _MethodDefault = DEFAULT,
325
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
325
326
  timeout: float | None = None,
326
327
  metadata: Sequence[tuple[str, str]] = (),
327
328
  ):
@@ -33,6 +33,7 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
33
33
  if TYPE_CHECKING:
34
34
  from google.api_core.operation import Operation
35
35
  from google.api_core.retry import Retry
36
+ from google.api_core.retry_async import AsyncRetry
36
37
  from google.cloud.devtools.cloudbuild_v1.types import Build, BuildTrigger, RepoSource
37
38
 
38
39
  # Time to sleep between active checks of the operation results
@@ -645,7 +646,7 @@ class CloudBuildAsyncHook(GoogleBaseHook):
645
646
  self,
646
647
  id_: str,
647
648
  project_id: str = PROVIDE_PROJECT_ID,
648
- retry: Retry | _MethodDefault = DEFAULT,
649
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
649
650
  timeout: float | None = None,
650
651
  metadata: Sequence[tuple[str, str]] = (),
651
652
  location: str = "global",
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
35
35
  from google.api_core.operation import Operation
36
36
  from google.api_core.operation_async import AsyncOperation
37
37
  from google.api_core.retry import Retry
38
+ from google.api_core.retry_async import AsyncRetry
38
39
  from google.cloud.orchestration.airflow.service_v1.services.environments.pagers import (
39
40
  ListEnvironmentsPager,
40
41
  )
@@ -332,7 +333,7 @@ class CloudComposerAsyncHook(GoogleBaseHook):
332
333
  project_id: str,
333
334
  region: str,
334
335
  environment: Environment | dict,
335
- retry: Retry | _MethodDefault = DEFAULT,
336
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
336
337
  timeout: float | None = None,
337
338
  metadata: Sequence[tuple[str, str]] = (),
338
339
  ) -> AsyncOperation:
@@ -361,7 +362,7 @@ class CloudComposerAsyncHook(GoogleBaseHook):
361
362
  project_id: str,
362
363
  region: str,
363
364
  environment_id: str,
364
- retry: Retry | _MethodDefault = DEFAULT,
365
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
365
366
  timeout: float | None = None,
366
367
  metadata: Sequence[tuple[str, str]] = (),
367
368
  ) -> AsyncOperation:
@@ -389,7 +390,7 @@ class CloudComposerAsyncHook(GoogleBaseHook):
389
390
  environment_id: str,
390
391
  environment: Environment | dict,
391
392
  update_mask: dict | FieldMask,
392
- retry: Retry | _MethodDefault = DEFAULT,
393
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
393
394
  timeout: float | None = None,
394
395
  metadata: Sequence[tuple[str, str]] = (),
395
396
  ) -> AsyncOperation:
@@ -86,6 +86,9 @@ class ComputeEngineSSHHook(SSHHook):
86
86
  :param gcp_conn_id: The connection id to use when fetching connection information
87
87
  :param max_retries: Maximum number of retries the process will try to establish connection to instance.
88
88
  Could be decreased/increased by user based on the amount of parallel SSH connections to the instance.
89
+ :param impersonation_chain: Optional. The service account email to impersonate using short-term
90
+ credentials. The provided service account must grant the originating account
91
+ the Service Account Token Creator IAM role and have the sufficient rights to perform the request
89
92
  """
90
93
 
91
94
  conn_name_attr = "gcp_conn_id"
@@ -93,8 +96,8 @@ class ComputeEngineSSHHook(SSHHook):
93
96
  conn_type = "gcpssh"
94
97
  hook_name = "Google Cloud SSH"
95
98
 
96
- @staticmethod
97
- def get_ui_field_behaviour() -> dict[str, Any]:
99
+ @classmethod
100
+ def get_ui_field_behaviour(cls) -> dict[str, Any]:
98
101
  return {
99
102
  "hidden_fields": ["host", "schema", "login", "password", "port", "extra"],
100
103
  "relabeling": {},
@@ -114,15 +117,17 @@ class ComputeEngineSSHHook(SSHHook):
114
117
  expire_time: int = 300,
115
118
  cmd_timeout: int | ArgNotSet = NOTSET,
116
119
  max_retries: int = 10,
120
+ impersonation_chain: str | None = None,
117
121
  **kwargs,
118
122
  ) -> None:
119
123
  if kwargs.get("delegate_to") is not None:
120
124
  raise RuntimeError(
121
125
  "The `delegate_to` parameter has been deprecated before and finally removed in this version"
122
- " of Google Provider. You MUST convert it to `impersonate_chain`"
126
+ " of Google Provider. You MUST convert it to `impersonation_chain`"
123
127
  )
124
128
  # Ignore original constructor
125
129
  # super().__init__()
130
+ self.gcp_conn_id = gcp_conn_id
126
131
  self.instance_name = instance_name
127
132
  self.zone = zone
128
133
  self.user = user
@@ -132,9 +137,9 @@ class ComputeEngineSSHHook(SSHHook):
132
137
  self.use_iap_tunnel = use_iap_tunnel
133
138
  self.use_oslogin = use_oslogin
134
139
  self.expire_time = expire_time
135
- self.gcp_conn_id = gcp_conn_id
136
140
  self.cmd_timeout = cmd_timeout
137
141
  self.max_retries = max_retries
142
+ self.impersonation_chain = impersonation_chain
138
143
  self._conn: Any | None = None
139
144
 
140
145
  @cached_property
@@ -143,7 +148,12 @@ class ComputeEngineSSHHook(SSHHook):
143
148
 
144
149
  @cached_property
145
150
  def _compute_hook(self) -> ComputeEngineHook:
146
- return ComputeEngineHook(gcp_conn_id=self.gcp_conn_id)
151
+ if self.impersonation_chain:
152
+ return ComputeEngineHook(
153
+ gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
154
+ )
155
+ else:
156
+ return ComputeEngineHook(gcp_conn_id=self.gcp_conn_id)
147
157
 
148
158
  def _load_connection_config(self):
149
159
  def _boolify(value):
@@ -254,6 +264,8 @@ class ComputeEngineSSHHook(SSHHook):
254
264
  f"--zone={self.zone}",
255
265
  "--verbosity=warning",
256
266
  ]
267
+ if self.impersonation_chain:
268
+ proxy_command_args.append(f"--impersonate-service-account={self.impersonation_chain}")
257
269
  proxy_command = " ".join(shlex.quote(arg) for arg in proxy_command_args)
258
270
  sshclient = self._connect_to_instance(user, hostname, privkey, proxy_command)
259
271
  break
@@ -283,7 +295,7 @@ class ComputeEngineSSHHook(SSHHook):
283
295
  client = _GCloudAuthorizedSSHClient(self._compute_hook)
284
296
  # Default is RejectPolicy
285
297
  # No known host checking since we are not storing privatekey
286
- client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
298
+ client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # nosec B507
287
299
  client.connect(
288
300
  hostname=hostname,
289
301
  username=user,
@@ -27,9 +27,10 @@ import time
27
27
  import uuid
28
28
  import warnings
29
29
  from copy import deepcopy
30
- from typing import Any, Callable, Generator, Sequence, TypeVar, cast
30
+ from typing import TYPE_CHECKING, Any, Callable, Generator, Sequence, TypeVar, cast
31
31
 
32
32
  from google.cloud.dataflow_v1beta3 import GetJobRequest, Job, JobState, JobsV1Beta3AsyncClient, JobView
33
+ from google.cloud.dataflow_v1beta3.types.jobs import ListJobsRequest
33
34
  from googleapiclient.discovery import build
34
35
 
35
36
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -42,6 +43,10 @@ from airflow.providers.google.common.hooks.base_google import (
42
43
  from airflow.utils.log.logging_mixin import LoggingMixin
43
44
  from airflow.utils.timeout import timeout
44
45
 
46
+ if TYPE_CHECKING:
47
+ from google.cloud.dataflow_v1beta3.services.jobs_v1_beta3.pagers import ListJobsAsyncPager
48
+
49
+
45
50
  # This is the default location
46
51
  # https://cloud.google.com/dataflow/pipelines/specifying-exec-params
47
52
  DEFAULT_DATAFLOW_LOCATION = "us-central1"
@@ -55,7 +60,7 @@ T = TypeVar("T", bound=Callable)
55
60
 
56
61
 
57
62
  def process_line_and_extract_dataflow_job_id_callback(
58
- on_new_job_id_callback: Callable[[str], None] | None
63
+ on_new_job_id_callback: Callable[[str], None] | None,
59
64
  ) -> Callable[[str], None]:
60
65
  """Build callback that triggers the specified function.
61
66
 
@@ -219,7 +224,7 @@ class _DataflowJobsController(LoggingMixin):
219
224
 
220
225
  def is_job_running(self) -> bool:
221
226
  """
222
- Helper method to check if jos is still running in dataflow.
227
+ Helper method to check if job is still running in dataflow.
223
228
 
224
229
  :return: True if job is running.
225
230
  """
@@ -1203,6 +1208,24 @@ class DataflowHook(GoogleBaseHook):
1203
1208
  )
1204
1209
  job_controller.wait_for_done()
1205
1210
 
1211
+ @GoogleBaseHook.fallback_to_default_project_id
1212
+ def is_job_done(self, location: str, project_id: str, job_id: str) -> bool:
1213
+ """
1214
+ Check that Dataflow job is started(for streaming job) or finished(for batch job).
1215
+
1216
+ :param location: location the job is running
1217
+ :param project_id: Google Cloud project ID in which to start a job
1218
+ :param job_id: Dataflow job ID
1219
+ """
1220
+ job_controller = _DataflowJobsController(
1221
+ dataflow=self.get_conn(),
1222
+ project_number=project_id,
1223
+ location=location,
1224
+ )
1225
+ job = job_controller.fetch_job_by_id(job_id)
1226
+
1227
+ return job_controller._check_dataflow_job_state(job)
1228
+
1206
1229
 
1207
1230
  class AsyncDataflowHook(GoogleBaseAsyncHook):
1208
1231
  """Async hook class for dataflow service."""
@@ -1295,3 +1318,38 @@ class AsyncDataflowHook(GoogleBaseAsyncHook):
1295
1318
  )
1296
1319
  state = job.current_state
1297
1320
  return state
1321
+
1322
+ async def list_jobs(
1323
+ self,
1324
+ jobs_filter: int | None = None,
1325
+ project_id: str | None = PROVIDE_PROJECT_ID,
1326
+ location: str | None = DEFAULT_DATAFLOW_LOCATION,
1327
+ page_size: int | None = None,
1328
+ page_token: str | None = None,
1329
+ ) -> ListJobsAsyncPager:
1330
+ """List jobs.
1331
+
1332
+ For detail see:
1333
+ https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.ListJobsRequest
1334
+
1335
+ :param jobs_filter: Optional. This field filters out and returns jobs in the specified job state.
1336
+ :param project_id: Optional. The Google Cloud project ID in which to start a job.
1337
+ If set to None or missing, the default project_id from the Google Cloud connection is used.
1338
+ :param location: Optional. The location of the Dataflow job (for example europe-west1).
1339
+ :param page_size: Optional. If there are many jobs, limit response to at most this many.
1340
+ :param page_token: Optional. Set this to the 'next_page_token' field of a previous response to request
1341
+ additional results in a long list.
1342
+ """
1343
+ project_id = project_id or (await self.get_project_id())
1344
+ client = await self.initialize_client(JobsV1Beta3AsyncClient)
1345
+ request: ListJobsRequest = ListJobsRequest(
1346
+ {
1347
+ "project_id": project_id,
1348
+ "location": location,
1349
+ "filter": jobs_filter,
1350
+ "page_size": page_size,
1351
+ "page_token": page_token,
1352
+ }
1353
+ )
1354
+ page_result: ListJobsAsyncPager = await client.list_jobs(request=request)
1355
+ return page_result
@@ -40,6 +40,7 @@ from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHoo
40
40
  if TYPE_CHECKING:
41
41
  from google.api_core.operation import Operation
42
42
  from google.api_core.retry import Retry
43
+ from google.api_core.retry_async import AsyncRetry
43
44
  from googleapiclient.discovery import Resource
44
45
 
45
46
  PATH_DATA_SCAN = "projects/{project_id}/locations/{region}/dataScans/{data_scan_id}"
@@ -896,7 +897,7 @@ class DataplexAsyncHook(GoogleBaseAsyncHook):
896
897
  region: str,
897
898
  data_scan_id: str | None = None,
898
899
  job_id: str | None = None,
899
- retry: Retry | _MethodDefault = DEFAULT,
900
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
900
901
  timeout: float | None = None,
901
902
  metadata: Sequence[tuple[str, str]] = (),
902
903
  ) -> Any:
@@ -51,6 +51,7 @@ if TYPE_CHECKING:
51
51
  from google.api_core.operation_async import AsyncOperation
52
52
  from google.api_core.operations_v1.operations_client import OperationsClient
53
53
  from google.api_core.retry import Retry
54
+ from google.api_core.retry_async import AsyncRetry
54
55
  from google.protobuf.duration_pb2 import Duration
55
56
  from google.protobuf.field_mask_pb2 import FieldMask
56
57
 
@@ -256,7 +257,7 @@ class DataprocHook(GoogleBaseHook):
256
257
  self,
257
258
  operation: Operation,
258
259
  timeout: float | None = None,
259
- result_retry: Retry | _MethodDefault = DEFAULT,
260
+ result_retry: AsyncRetry | _MethodDefault = DEFAULT,
260
261
  ) -> Any:
261
262
  """Wait for a long-lasting operation to complete."""
262
263
  try:
@@ -997,7 +998,7 @@ class DataprocHook(GoogleBaseHook):
997
998
  region: str,
998
999
  project_id: str,
999
1000
  wait_check_interval: int = 10,
1000
- retry: Retry | _MethodDefault = DEFAULT,
1001
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1001
1002
  timeout: float | None = None,
1002
1003
  metadata: Sequence[tuple[str, str]] = (),
1003
1004
  ) -> Batch:
@@ -1132,7 +1133,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1132
1133
  virtual_cluster_config: dict | None = None,
1133
1134
  labels: dict[str, str] | None = None,
1134
1135
  request_id: str | None = None,
1135
- retry: Retry | _MethodDefault = DEFAULT,
1136
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1136
1137
  timeout: float | None = None,
1137
1138
  metadata: Sequence[tuple[str, str]] = (),
1138
1139
  ) -> AsyncOperation:
@@ -1199,7 +1200,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1199
1200
  project_id: str,
1200
1201
  cluster_uuid: str | None = None,
1201
1202
  request_id: str | None = None,
1202
- retry: Retry | _MethodDefault = DEFAULT,
1203
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1203
1204
  timeout: float | None = None,
1204
1205
  metadata: Sequence[tuple[str, str]] = (),
1205
1206
  ) -> AsyncOperation:
@@ -1242,7 +1243,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1242
1243
  region: str,
1243
1244
  cluster_name: str,
1244
1245
  project_id: str,
1245
- retry: Retry | _MethodDefault = DEFAULT,
1246
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1246
1247
  timeout: float | None = None,
1247
1248
  metadata: Sequence[tuple[str, str]] = (),
1248
1249
  ) -> str:
@@ -1277,7 +1278,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1277
1278
  region: str,
1278
1279
  cluster_name: str,
1279
1280
  project_id: str,
1280
- retry: Retry | _MethodDefault = DEFAULT,
1281
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1281
1282
  timeout: float | None = None,
1282
1283
  metadata: Sequence[tuple[str, str]] = (),
1283
1284
  ) -> Cluster:
@@ -1309,7 +1310,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1309
1310
  filter_: str,
1310
1311
  project_id: str,
1311
1312
  page_size: int | None = None,
1312
- retry: Retry | _MethodDefault = DEFAULT,
1313
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1313
1314
  timeout: float | None = None,
1314
1315
  metadata: Sequence[tuple[str, str]] = (),
1315
1316
  ):
@@ -1349,7 +1350,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1349
1350
  region: str,
1350
1351
  graceful_decommission_timeout: dict | Duration | None = None,
1351
1352
  request_id: str | None = None,
1352
- retry: Retry | _MethodDefault = DEFAULT,
1353
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1353
1354
  timeout: float | None = None,
1354
1355
  metadata: Sequence[tuple[str, str]] = (),
1355
1356
  ) -> AsyncOperation:
@@ -1429,7 +1430,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1429
1430
  template: dict | WorkflowTemplate,
1430
1431
  project_id: str,
1431
1432
  region: str,
1432
- retry: Retry | _MethodDefault = DEFAULT,
1433
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1433
1434
  timeout: float | None = None,
1434
1435
  metadata: Sequence[tuple[str, str]] = (),
1435
1436
  ) -> WorkflowTemplate:
@@ -1465,7 +1466,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1465
1466
  version: int | None = None,
1466
1467
  request_id: str | None = None,
1467
1468
  parameters: dict[str, str] | None = None,
1468
- retry: Retry | _MethodDefault = DEFAULT,
1469
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1469
1470
  timeout: float | None = None,
1470
1471
  metadata: Sequence[tuple[str, str]] = (),
1471
1472
  ) -> AsyncOperation:
@@ -1511,7 +1512,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1511
1512
  project_id: str,
1512
1513
  region: str,
1513
1514
  request_id: str | None = None,
1514
- retry: Retry | _MethodDefault = DEFAULT,
1515
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1515
1516
  timeout: float | None = None,
1516
1517
  metadata: Sequence[tuple[str, str]] = (),
1517
1518
  ) -> AsyncOperation:
@@ -1554,7 +1555,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1554
1555
  job_id: str,
1555
1556
  project_id: str,
1556
1557
  region: str,
1557
- retry: Retry | _MethodDefault = DEFAULT,
1558
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1558
1559
  timeout: float | None = None,
1559
1560
  metadata: Sequence[tuple[str, str]] = (),
1560
1561
  ) -> Job:
@@ -1588,7 +1589,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1588
1589
  project_id: str,
1589
1590
  region: str,
1590
1591
  request_id: str | None = None,
1591
- retry: Retry | _MethodDefault = DEFAULT,
1592
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1592
1593
  timeout: float | None = None,
1593
1594
  metadata: Sequence[tuple[str, str]] = (),
1594
1595
  ) -> Job:
@@ -1624,7 +1625,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1624
1625
  job_id: str,
1625
1626
  project_id: str,
1626
1627
  region: str | None = None,
1627
- retry: Retry | _MethodDefault = DEFAULT,
1628
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1628
1629
  timeout: float | None = None,
1629
1630
  metadata: Sequence[tuple[str, str]] = (),
1630
1631
  ) -> Job:
@@ -1658,7 +1659,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1658
1659
  batch: dict | Batch,
1659
1660
  batch_id: str | None = None,
1660
1661
  request_id: str | None = None,
1661
- retry: Retry | _MethodDefault = DEFAULT,
1662
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1662
1663
  timeout: float | None = None,
1663
1664
  metadata: Sequence[tuple[str, str]] = (),
1664
1665
  ) -> AsyncOperation:
@@ -1703,7 +1704,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1703
1704
  batch_id: str,
1704
1705
  region: str,
1705
1706
  project_id: str,
1706
- retry: Retry | _MethodDefault = DEFAULT,
1707
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1707
1708
  timeout: float | None = None,
1708
1709
  metadata: Sequence[tuple[str, str]] = (),
1709
1710
  ) -> None:
@@ -1737,7 +1738,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1737
1738
  batch_id: str,
1738
1739
  region: str,
1739
1740
  project_id: str,
1740
- retry: Retry | _MethodDefault = DEFAULT,
1741
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1741
1742
  timeout: float | None = None,
1742
1743
  metadata: Sequence[tuple[str, str]] = (),
1743
1744
  ) -> Batch:
@@ -1773,7 +1774,7 @@ class DataprocAsyncHook(GoogleBaseHook):
1773
1774
  project_id: str,
1774
1775
  page_size: int | None = None,
1775
1776
  page_token: str | None = None,
1776
- retry: Retry | _MethodDefault = DEFAULT,
1777
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
1777
1778
  timeout: float | None = None,
1778
1779
  metadata: Sequence[tuple[str, str]] = (),
1779
1780
  filter: str | None = None,
@@ -822,10 +822,12 @@ class GCSHook(GoogleBaseHook):
822
822
  versions=versions,
823
823
  )
824
824
 
825
+ blob_names = [blob.name for blob in blobs]
826
+
825
827
  if blobs.prefixes:
826
828
  ids.extend(blobs.prefixes)
827
829
  else:
828
- ids.extend(blob.name for blob in blobs)
830
+ ids.extend(blob_names)
829
831
 
830
832
  page_token = blobs.next_page_token
831
833
  if page_token is None:
@@ -933,14 +935,16 @@ class GCSHook(GoogleBaseHook):
933
935
  versions=versions,
934
936
  )
935
937
 
938
+ blob_names = [
939
+ blob.name
940
+ for blob in blobs
941
+ if timespan_start <= blob.updated.replace(tzinfo=timezone.utc) < timespan_end
942
+ ]
943
+
936
944
  if blobs.prefixes:
937
945
  ids.extend(blobs.prefixes)
938
946
  else:
939
- ids.extend(
940
- blob.name
941
- for blob in blobs
942
- if timespan_start <= blob.updated.replace(tzinfo=timezone.utc) < timespan_end
943
- )
947
+ ids.extend(blob_names)
944
948
 
945
949
  page_token = blobs.next_page_token
946
950
  if page_token is None:
@@ -49,6 +49,7 @@ from airflow.version import version
49
49
 
50
50
  if TYPE_CHECKING:
51
51
  from google.api_core.retry import Retry
52
+ from google.api_core.retry_async import AsyncRetry
52
53
  from google.cloud.pubsub_v1.types import (
53
54
  DeadLetterPolicy,
54
55
  Duration,
@@ -611,7 +612,7 @@ class PubSubAsyncHook(GoogleBaseAsyncHook):
611
612
  project_id: str,
612
613
  ack_ids: list[str] | None = None,
613
614
  messages: list[ReceivedMessage] | None = None,
614
- retry: Retry | _MethodDefault = DEFAULT,
615
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
615
616
  timeout: float | None = None,
616
617
  metadata: Sequence[tuple[str, str]] = (),
617
618
  ) -> None:
@@ -665,7 +666,7 @@ class PubSubAsyncHook(GoogleBaseAsyncHook):
665
666
  max_messages: int,
666
667
  project_id: str = PROVIDE_PROJECT_ID,
667
668
  return_immediately: bool = False,
668
- retry: Retry | _MethodDefault = DEFAULT,
669
+ retry: AsyncRetry | _MethodDefault = DEFAULT,
669
670
  timeout: float | None = None,
670
671
  metadata: Sequence[tuple[str, str]] = (),
671
672
  ) -> list[ReceivedMessage]:
@@ -26,7 +26,6 @@ from typing import TYPE_CHECKING, Collection
26
26
 
27
27
  # not sure why but mypy complains on missing `storage` but it is clearly there and is importable
28
28
  from google.cloud import storage # type: ignore[attr-defined]
29
- from packaging.version import Version
30
29
 
31
30
  from airflow.configuration import conf
32
31
  from airflow.exceptions import AirflowNotFoundException
@@ -48,18 +47,6 @@ _DEFAULT_SCOPESS = frozenset(
48
47
  logger = logging.getLogger(__name__)
49
48
 
50
49
 
51
- def get_default_delete_local_copy():
52
- """Load delete_local_logs conf if Airflow version > 2.6 and return False if not.
53
-
54
- TODO: delete this function when min airflow version >= 2.6.
55
- """
56
- from airflow.version import version
57
-
58
- if Version(version) < Version("2.6"):
59
- return False
60
- return conf.getboolean("logging", "delete_local_logs")
61
-
62
-
63
50
  class GCSTaskHandler(FileTaskHandler, LoggingMixin):
64
51
  """
65
52
  GCSTaskHandler is a python log handler that handles and reads task instance logs.
@@ -108,8 +95,8 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
108
95
  self.gcp_keyfile_dict = gcp_keyfile_dict
109
96
  self.scopes = gcp_scopes
110
97
  self.project_id = project_id
111
- self.delete_local_copy = (
112
- kwargs["delete_local_copy"] if "delete_local_copy" in kwargs else get_default_delete_local_copy()
98
+ self.delete_local_copy = kwargs.get(
99
+ "delete_local_copy", conf.getboolean("logging", "delete_local_logs")
113
100
  )
114
101
 
115
102
  @cached_property
@@ -218,30 +205,6 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
218
205
  messages.append(f"Unable to read remote log {e}")
219
206
  return messages, logs
220
207
 
221
- def _read(self, ti, try_number, metadata=None):
222
- """
223
- Read logs of given task instance and try_number from GCS.
224
-
225
- If failed, read the log from task instance host machine.
226
-
227
- todo: when min airflow version >= 2.6, remove this method
228
-
229
- :param ti: task instance object
230
- :param try_number: task instance try_number to read logs from
231
- :param metadata: log metadata,
232
- can be used for steaming log reading and auto-tailing.
233
- """
234
- if hasattr(super(), "_read_remote_logs"):
235
- # from Airflow 2.6, we don't implement the `_read` method.
236
- # if parent has _read_remote_logs, we're >= 2.6
237
- return super()._read(ti, try_number, metadata)
238
-
239
- messages, logs = self._read_remote_logs(ti, try_number, metadata)
240
- if not logs:
241
- return super()._read(ti, try_number, metadata)
242
-
243
- return "".join([f"*** {x}\n" for x in messages]) + "\n".join(logs), {"end_of_log": True}
244
-
245
208
  def gcs_write(self, log, remote_log_location) -> bool:
246
209
  """
247
210
  Write the log to the remote location and return `True`; fail silently and return `False` on error.
@@ -30,21 +30,13 @@ from google.cloud.logging_v2.types import ListLogEntriesRequest, ListLogEntriesR
30
30
 
31
31
  from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
32
32
  from airflow.providers.google.common.consts import CLIENT_INFO
33
+ from airflow.utils.log.trigger_handler import ctx_indiv_trigger
33
34
 
34
35
  if TYPE_CHECKING:
35
- from contextvars import ContextVar
36
-
37
36
  from google.auth.credentials import Credentials
38
37
 
39
38
  from airflow.models import TaskInstance
40
39
 
41
- try:
42
- # todo: remove this conditional import when min airflow version >= 2.6
43
- ctx_indiv_trigger: ContextVar | None
44
- from airflow.utils.log.trigger_handler import ctx_indiv_trigger
45
- except ImportError:
46
- ctx_indiv_trigger = None
47
-
48
40
  DEFAULT_LOGGER_NAME = "airflow"
49
41
  _GLOBAL_RESOURCE = Resource(type="global", labels={})
50
42
 
@@ -174,8 +166,7 @@ class StackdriverTaskHandler(logging.Handler):
174
166
  """
175
167
  message = self.format(record)
176
168
  ti = None
177
- # todo: remove ctx_indiv_trigger is not None check when min airflow version >= 2.6
178
- if ctx_indiv_trigger is not None and getattr(record, ctx_indiv_trigger.name, None):
169
+ if getattr(record, ctx_indiv_trigger.name, None):
179
170
  ti = getattr(record, "task_instance", None) # trigger context
180
171
  labels = self._get_labels(ti)
181
172
  self._transport.send(record, message, resource=self.resource, labels=labels)