apache-airflow-providers-google 10.12.0__py3-none-any.whl → 10.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +3 -3
- airflow/providers/google/cloud/fs/gcs.py +16 -13
- airflow/providers/google/cloud/hooks/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/hooks/cloud_build.py +2 -1
- airflow/providers/google/cloud/hooks/cloud_composer.py +4 -3
- airflow/providers/google/cloud/hooks/compute_ssh.py +18 -6
- airflow/providers/google/cloud/hooks/dataflow.py +61 -3
- airflow/providers/google/cloud/hooks/dataplex.py +2 -1
- airflow/providers/google/cloud/hooks/dataproc.py +19 -18
- airflow/providers/google/cloud/hooks/gcs.py +10 -6
- airflow/providers/google/cloud/hooks/pubsub.py +3 -2
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -39
- airflow/providers/google/cloud/log/stackdriver_task_handler.py +2 -11
- airflow/providers/google/cloud/operators/bigquery.py +47 -47
- airflow/providers/google/cloud/operators/cloud_composer.py +1 -1
- airflow/providers/google/cloud/operators/cloud_run.py +3 -3
- airflow/providers/google/cloud/operators/dataflow.py +6 -0
- airflow/providers/google/cloud/operators/dataplex.py +530 -1
- airflow/providers/google/cloud/operators/dataproc.py +11 -11
- airflow/providers/google/cloud/operators/gcs.py +90 -15
- airflow/providers/google/cloud/operators/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/operators/pubsub.py +47 -55
- airflow/providers/google/cloud/secrets/secret_manager.py +22 -1
- airflow/providers/google/cloud/sensors/cloud_composer.py +14 -1
- airflow/providers/google/cloud/sensors/dataplex.py +118 -0
- airflow/providers/google/cloud/sensors/gcs.py +10 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +5 -5
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +42 -42
- airflow/providers/google/cloud/transfers/mssql_to_gcs.py +9 -9
- airflow/providers/google/cloud/triggers/cloud_run.py +7 -7
- airflow/providers/google/cloud/triggers/dataplex.py +82 -0
- airflow/providers/google/cloud/triggers/dataproc.py +2 -5
- airflow/providers/google/cloud/triggers/gcs.py +13 -3
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +3 -1
- airflow/providers/google/common/hooks/base_google.py +6 -4
- airflow/providers/google/get_provider_info.py +14 -13
- {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/METADATA +30 -30
- {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/RECORD +40 -40
- {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.12.0.dist-info → apache_airflow_providers_google-10.13.0.dist-info}/entry_points.txt +0 -0
@@ -27,7 +27,7 @@ import packaging.version
|
|
27
27
|
|
28
28
|
__all__ = ["__version__"]
|
29
29
|
|
30
|
-
__version__ = "10.
|
30
|
+
__version__ = "10.13.0"
|
31
31
|
|
32
32
|
try:
|
33
33
|
from airflow import __version__ as airflow_version
|
@@ -35,8 +35,8 @@ except ImportError:
|
|
35
35
|
from airflow.version import version as airflow_version
|
36
36
|
|
37
37
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
38
|
-
"2.
|
38
|
+
"2.6.0"
|
39
39
|
):
|
40
40
|
raise RuntimeError(
|
41
|
-
f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.
|
41
|
+
f"The package `apache-airflow-providers-google:{__version__}` needs Apache Airflow 2.6.0+"
|
42
42
|
)
|
@@ -39,7 +39,7 @@ GCS_VERSION_AWARE = "gcs.version-aware"
|
|
39
39
|
schemes = ["gs", "gcs"]
|
40
40
|
|
41
41
|
|
42
|
-
def get_fs(conn_id: str | None) -> AbstractFileSystem:
|
42
|
+
def get_fs(conn_id: str | None, storage_options: dict[str, str] | None = None) -> AbstractFileSystem:
|
43
43
|
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
|
44
44
|
from gcsfs import GCSFileSystem
|
45
45
|
|
@@ -49,15 +49,18 @@ def get_fs(conn_id: str | None) -> AbstractFileSystem:
|
|
49
49
|
g = GoogleBaseHook(gcp_conn_id=conn_id)
|
50
50
|
creds = g.get_credentials()
|
51
51
|
|
52
|
-
|
53
|
-
project
|
54
|
-
access
|
55
|
-
token
|
56
|
-
consistency
|
57
|
-
cache_timeout
|
58
|
-
requester_pays
|
59
|
-
session_kwargs
|
60
|
-
endpoint_url
|
61
|
-
default_location
|
62
|
-
version_aware
|
63
|
-
|
52
|
+
options = {
|
53
|
+
"project": g.project_id,
|
54
|
+
"access": g.extras.get(GCS_ACCESS, "full_control"),
|
55
|
+
"token": creds.token,
|
56
|
+
"consistency": g.extras.get(GCS_CONSISTENCY, "none"),
|
57
|
+
"cache_timeout": g.extras.get(GCS_CACHE_TIMEOUT),
|
58
|
+
"requester_pays": g.extras.get(GCS_REQUESTER_PAYS, False),
|
59
|
+
"session_kwargs": g.extras.get(GCS_SESSION_KWARGS, {}),
|
60
|
+
"endpoint_url": g.extras.get(GCS_ENDPOINT),
|
61
|
+
"default_location": g.extras.get(GCS_DEFAULT_LOCATION),
|
62
|
+
"version_aware": g.extras.get(GCS_VERSION_AWARE, "false").lower() == "true",
|
63
|
+
}
|
64
|
+
options.update(storage_options or {})
|
65
|
+
|
66
|
+
return GCSFileSystem(**options)
|
@@ -38,6 +38,7 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
38
38
|
|
39
39
|
if TYPE_CHECKING:
|
40
40
|
from google.api_core.retry import Retry
|
41
|
+
from google.api_core.retry_async import AsyncRetry
|
41
42
|
from googleapiclient.discovery import Resource
|
42
43
|
|
43
44
|
|
@@ -321,7 +322,7 @@ class AsyncBiqQueryDataTransferServiceHook(GoogleBaseAsyncHook):
|
|
321
322
|
run_id: str,
|
322
323
|
project_id: str | None,
|
323
324
|
location: str | None = None,
|
324
|
-
retry:
|
325
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
325
326
|
timeout: float | None = None,
|
326
327
|
metadata: Sequence[tuple[str, str]] = (),
|
327
328
|
):
|
@@ -33,6 +33,7 @@ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
|
33
33
|
if TYPE_CHECKING:
|
34
34
|
from google.api_core.operation import Operation
|
35
35
|
from google.api_core.retry import Retry
|
36
|
+
from google.api_core.retry_async import AsyncRetry
|
36
37
|
from google.cloud.devtools.cloudbuild_v1.types import Build, BuildTrigger, RepoSource
|
37
38
|
|
38
39
|
# Time to sleep between active checks of the operation results
|
@@ -645,7 +646,7 @@ class CloudBuildAsyncHook(GoogleBaseHook):
|
|
645
646
|
self,
|
646
647
|
id_: str,
|
647
648
|
project_id: str = PROVIDE_PROJECT_ID,
|
648
|
-
retry:
|
649
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
649
650
|
timeout: float | None = None,
|
650
651
|
metadata: Sequence[tuple[str, str]] = (),
|
651
652
|
location: str = "global",
|
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
|
|
35
35
|
from google.api_core.operation import Operation
|
36
36
|
from google.api_core.operation_async import AsyncOperation
|
37
37
|
from google.api_core.retry import Retry
|
38
|
+
from google.api_core.retry_async import AsyncRetry
|
38
39
|
from google.cloud.orchestration.airflow.service_v1.services.environments.pagers import (
|
39
40
|
ListEnvironmentsPager,
|
40
41
|
)
|
@@ -332,7 +333,7 @@ class CloudComposerAsyncHook(GoogleBaseHook):
|
|
332
333
|
project_id: str,
|
333
334
|
region: str,
|
334
335
|
environment: Environment | dict,
|
335
|
-
retry:
|
336
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
336
337
|
timeout: float | None = None,
|
337
338
|
metadata: Sequence[tuple[str, str]] = (),
|
338
339
|
) -> AsyncOperation:
|
@@ -361,7 +362,7 @@ class CloudComposerAsyncHook(GoogleBaseHook):
|
|
361
362
|
project_id: str,
|
362
363
|
region: str,
|
363
364
|
environment_id: str,
|
364
|
-
retry:
|
365
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
365
366
|
timeout: float | None = None,
|
366
367
|
metadata: Sequence[tuple[str, str]] = (),
|
367
368
|
) -> AsyncOperation:
|
@@ -389,7 +390,7 @@ class CloudComposerAsyncHook(GoogleBaseHook):
|
|
389
390
|
environment_id: str,
|
390
391
|
environment: Environment | dict,
|
391
392
|
update_mask: dict | FieldMask,
|
392
|
-
retry:
|
393
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
393
394
|
timeout: float | None = None,
|
394
395
|
metadata: Sequence[tuple[str, str]] = (),
|
395
396
|
) -> AsyncOperation:
|
@@ -86,6 +86,9 @@ class ComputeEngineSSHHook(SSHHook):
|
|
86
86
|
:param gcp_conn_id: The connection id to use when fetching connection information
|
87
87
|
:param max_retries: Maximum number of retries the process will try to establish connection to instance.
|
88
88
|
Could be decreased/increased by user based on the amount of parallel SSH connections to the instance.
|
89
|
+
:param impersonation_chain: Optional. The service account email to impersonate using short-term
|
90
|
+
credentials. The provided service account must grant the originating account
|
91
|
+
the Service Account Token Creator IAM role and have the sufficient rights to perform the request
|
89
92
|
"""
|
90
93
|
|
91
94
|
conn_name_attr = "gcp_conn_id"
|
@@ -93,8 +96,8 @@ class ComputeEngineSSHHook(SSHHook):
|
|
93
96
|
conn_type = "gcpssh"
|
94
97
|
hook_name = "Google Cloud SSH"
|
95
98
|
|
96
|
-
@
|
97
|
-
def get_ui_field_behaviour() -> dict[str, Any]:
|
99
|
+
@classmethod
|
100
|
+
def get_ui_field_behaviour(cls) -> dict[str, Any]:
|
98
101
|
return {
|
99
102
|
"hidden_fields": ["host", "schema", "login", "password", "port", "extra"],
|
100
103
|
"relabeling": {},
|
@@ -114,15 +117,17 @@ class ComputeEngineSSHHook(SSHHook):
|
|
114
117
|
expire_time: int = 300,
|
115
118
|
cmd_timeout: int | ArgNotSet = NOTSET,
|
116
119
|
max_retries: int = 10,
|
120
|
+
impersonation_chain: str | None = None,
|
117
121
|
**kwargs,
|
118
122
|
) -> None:
|
119
123
|
if kwargs.get("delegate_to") is not None:
|
120
124
|
raise RuntimeError(
|
121
125
|
"The `delegate_to` parameter has been deprecated before and finally removed in this version"
|
122
|
-
" of Google Provider. You MUST convert it to `
|
126
|
+
" of Google Provider. You MUST convert it to `impersonation_chain`"
|
123
127
|
)
|
124
128
|
# Ignore original constructor
|
125
129
|
# super().__init__()
|
130
|
+
self.gcp_conn_id = gcp_conn_id
|
126
131
|
self.instance_name = instance_name
|
127
132
|
self.zone = zone
|
128
133
|
self.user = user
|
@@ -132,9 +137,9 @@ class ComputeEngineSSHHook(SSHHook):
|
|
132
137
|
self.use_iap_tunnel = use_iap_tunnel
|
133
138
|
self.use_oslogin = use_oslogin
|
134
139
|
self.expire_time = expire_time
|
135
|
-
self.gcp_conn_id = gcp_conn_id
|
136
140
|
self.cmd_timeout = cmd_timeout
|
137
141
|
self.max_retries = max_retries
|
142
|
+
self.impersonation_chain = impersonation_chain
|
138
143
|
self._conn: Any | None = None
|
139
144
|
|
140
145
|
@cached_property
|
@@ -143,7 +148,12 @@ class ComputeEngineSSHHook(SSHHook):
|
|
143
148
|
|
144
149
|
@cached_property
|
145
150
|
def _compute_hook(self) -> ComputeEngineHook:
|
146
|
-
|
151
|
+
if self.impersonation_chain:
|
152
|
+
return ComputeEngineHook(
|
153
|
+
gcp_conn_id=self.gcp_conn_id, impersonation_chain=self.impersonation_chain
|
154
|
+
)
|
155
|
+
else:
|
156
|
+
return ComputeEngineHook(gcp_conn_id=self.gcp_conn_id)
|
147
157
|
|
148
158
|
def _load_connection_config(self):
|
149
159
|
def _boolify(value):
|
@@ -254,6 +264,8 @@ class ComputeEngineSSHHook(SSHHook):
|
|
254
264
|
f"--zone={self.zone}",
|
255
265
|
"--verbosity=warning",
|
256
266
|
]
|
267
|
+
if self.impersonation_chain:
|
268
|
+
proxy_command_args.append(f"--impersonate-service-account={self.impersonation_chain}")
|
257
269
|
proxy_command = " ".join(shlex.quote(arg) for arg in proxy_command_args)
|
258
270
|
sshclient = self._connect_to_instance(user, hostname, privkey, proxy_command)
|
259
271
|
break
|
@@ -283,7 +295,7 @@ class ComputeEngineSSHHook(SSHHook):
|
|
283
295
|
client = _GCloudAuthorizedSSHClient(self._compute_hook)
|
284
296
|
# Default is RejectPolicy
|
285
297
|
# No known host checking since we are not storing privatekey
|
286
|
-
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
298
|
+
client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) # nosec B507
|
287
299
|
client.connect(
|
288
300
|
hostname=hostname,
|
289
301
|
username=user,
|
@@ -27,9 +27,10 @@ import time
|
|
27
27
|
import uuid
|
28
28
|
import warnings
|
29
29
|
from copy import deepcopy
|
30
|
-
from typing import Any, Callable, Generator, Sequence, TypeVar, cast
|
30
|
+
from typing import TYPE_CHECKING, Any, Callable, Generator, Sequence, TypeVar, cast
|
31
31
|
|
32
32
|
from google.cloud.dataflow_v1beta3 import GetJobRequest, Job, JobState, JobsV1Beta3AsyncClient, JobView
|
33
|
+
from google.cloud.dataflow_v1beta3.types.jobs import ListJobsRequest
|
33
34
|
from googleapiclient.discovery import build
|
34
35
|
|
35
36
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -42,6 +43,10 @@ from airflow.providers.google.common.hooks.base_google import (
|
|
42
43
|
from airflow.utils.log.logging_mixin import LoggingMixin
|
43
44
|
from airflow.utils.timeout import timeout
|
44
45
|
|
46
|
+
if TYPE_CHECKING:
|
47
|
+
from google.cloud.dataflow_v1beta3.services.jobs_v1_beta3.pagers import ListJobsAsyncPager
|
48
|
+
|
49
|
+
|
45
50
|
# This is the default location
|
46
51
|
# https://cloud.google.com/dataflow/pipelines/specifying-exec-params
|
47
52
|
DEFAULT_DATAFLOW_LOCATION = "us-central1"
|
@@ -55,7 +60,7 @@ T = TypeVar("T", bound=Callable)
|
|
55
60
|
|
56
61
|
|
57
62
|
def process_line_and_extract_dataflow_job_id_callback(
|
58
|
-
on_new_job_id_callback: Callable[[str], None] | None
|
63
|
+
on_new_job_id_callback: Callable[[str], None] | None,
|
59
64
|
) -> Callable[[str], None]:
|
60
65
|
"""Build callback that triggers the specified function.
|
61
66
|
|
@@ -219,7 +224,7 @@ class _DataflowJobsController(LoggingMixin):
|
|
219
224
|
|
220
225
|
def is_job_running(self) -> bool:
|
221
226
|
"""
|
222
|
-
Helper method to check if
|
227
|
+
Helper method to check if job is still running in dataflow.
|
223
228
|
|
224
229
|
:return: True if job is running.
|
225
230
|
"""
|
@@ -1203,6 +1208,24 @@ class DataflowHook(GoogleBaseHook):
|
|
1203
1208
|
)
|
1204
1209
|
job_controller.wait_for_done()
|
1205
1210
|
|
1211
|
+
@GoogleBaseHook.fallback_to_default_project_id
|
1212
|
+
def is_job_done(self, location: str, project_id: str, job_id: str) -> bool:
|
1213
|
+
"""
|
1214
|
+
Check that Dataflow job is started(for streaming job) or finished(for batch job).
|
1215
|
+
|
1216
|
+
:param location: location the job is running
|
1217
|
+
:param project_id: Google Cloud project ID in which to start a job
|
1218
|
+
:param job_id: Dataflow job ID
|
1219
|
+
"""
|
1220
|
+
job_controller = _DataflowJobsController(
|
1221
|
+
dataflow=self.get_conn(),
|
1222
|
+
project_number=project_id,
|
1223
|
+
location=location,
|
1224
|
+
)
|
1225
|
+
job = job_controller.fetch_job_by_id(job_id)
|
1226
|
+
|
1227
|
+
return job_controller._check_dataflow_job_state(job)
|
1228
|
+
|
1206
1229
|
|
1207
1230
|
class AsyncDataflowHook(GoogleBaseAsyncHook):
|
1208
1231
|
"""Async hook class for dataflow service."""
|
@@ -1295,3 +1318,38 @@ class AsyncDataflowHook(GoogleBaseAsyncHook):
|
|
1295
1318
|
)
|
1296
1319
|
state = job.current_state
|
1297
1320
|
return state
|
1321
|
+
|
1322
|
+
async def list_jobs(
|
1323
|
+
self,
|
1324
|
+
jobs_filter: int | None = None,
|
1325
|
+
project_id: str | None = PROVIDE_PROJECT_ID,
|
1326
|
+
location: str | None = DEFAULT_DATAFLOW_LOCATION,
|
1327
|
+
page_size: int | None = None,
|
1328
|
+
page_token: str | None = None,
|
1329
|
+
) -> ListJobsAsyncPager:
|
1330
|
+
"""List jobs.
|
1331
|
+
|
1332
|
+
For detail see:
|
1333
|
+
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.ListJobsRequest
|
1334
|
+
|
1335
|
+
:param jobs_filter: Optional. This field filters out and returns jobs in the specified job state.
|
1336
|
+
:param project_id: Optional. The Google Cloud project ID in which to start a job.
|
1337
|
+
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
1338
|
+
:param location: Optional. The location of the Dataflow job (for example europe-west1).
|
1339
|
+
:param page_size: Optional. If there are many jobs, limit response to at most this many.
|
1340
|
+
:param page_token: Optional. Set this to the 'next_page_token' field of a previous response to request
|
1341
|
+
additional results in a long list.
|
1342
|
+
"""
|
1343
|
+
project_id = project_id or (await self.get_project_id())
|
1344
|
+
client = await self.initialize_client(JobsV1Beta3AsyncClient)
|
1345
|
+
request: ListJobsRequest = ListJobsRequest(
|
1346
|
+
{
|
1347
|
+
"project_id": project_id,
|
1348
|
+
"location": location,
|
1349
|
+
"filter": jobs_filter,
|
1350
|
+
"page_size": page_size,
|
1351
|
+
"page_token": page_token,
|
1352
|
+
}
|
1353
|
+
)
|
1354
|
+
page_result: ListJobsAsyncPager = await client.list_jobs(request=request)
|
1355
|
+
return page_result
|
@@ -40,6 +40,7 @@ from airflow.providers.google.common.hooks.base_google import GoogleBaseAsyncHoo
|
|
40
40
|
if TYPE_CHECKING:
|
41
41
|
from google.api_core.operation import Operation
|
42
42
|
from google.api_core.retry import Retry
|
43
|
+
from google.api_core.retry_async import AsyncRetry
|
43
44
|
from googleapiclient.discovery import Resource
|
44
45
|
|
45
46
|
PATH_DATA_SCAN = "projects/{project_id}/locations/{region}/dataScans/{data_scan_id}"
|
@@ -896,7 +897,7 @@ class DataplexAsyncHook(GoogleBaseAsyncHook):
|
|
896
897
|
region: str,
|
897
898
|
data_scan_id: str | None = None,
|
898
899
|
job_id: str | None = None,
|
899
|
-
retry:
|
900
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
900
901
|
timeout: float | None = None,
|
901
902
|
metadata: Sequence[tuple[str, str]] = (),
|
902
903
|
) -> Any:
|
@@ -51,6 +51,7 @@ if TYPE_CHECKING:
|
|
51
51
|
from google.api_core.operation_async import AsyncOperation
|
52
52
|
from google.api_core.operations_v1.operations_client import OperationsClient
|
53
53
|
from google.api_core.retry import Retry
|
54
|
+
from google.api_core.retry_async import AsyncRetry
|
54
55
|
from google.protobuf.duration_pb2 import Duration
|
55
56
|
from google.protobuf.field_mask_pb2 import FieldMask
|
56
57
|
|
@@ -256,7 +257,7 @@ class DataprocHook(GoogleBaseHook):
|
|
256
257
|
self,
|
257
258
|
operation: Operation,
|
258
259
|
timeout: float | None = None,
|
259
|
-
result_retry:
|
260
|
+
result_retry: AsyncRetry | _MethodDefault = DEFAULT,
|
260
261
|
) -> Any:
|
261
262
|
"""Wait for a long-lasting operation to complete."""
|
262
263
|
try:
|
@@ -997,7 +998,7 @@ class DataprocHook(GoogleBaseHook):
|
|
997
998
|
region: str,
|
998
999
|
project_id: str,
|
999
1000
|
wait_check_interval: int = 10,
|
1000
|
-
retry:
|
1001
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1001
1002
|
timeout: float | None = None,
|
1002
1003
|
metadata: Sequence[tuple[str, str]] = (),
|
1003
1004
|
) -> Batch:
|
@@ -1132,7 +1133,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1132
1133
|
virtual_cluster_config: dict | None = None,
|
1133
1134
|
labels: dict[str, str] | None = None,
|
1134
1135
|
request_id: str | None = None,
|
1135
|
-
retry:
|
1136
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1136
1137
|
timeout: float | None = None,
|
1137
1138
|
metadata: Sequence[tuple[str, str]] = (),
|
1138
1139
|
) -> AsyncOperation:
|
@@ -1199,7 +1200,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1199
1200
|
project_id: str,
|
1200
1201
|
cluster_uuid: str | None = None,
|
1201
1202
|
request_id: str | None = None,
|
1202
|
-
retry:
|
1203
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1203
1204
|
timeout: float | None = None,
|
1204
1205
|
metadata: Sequence[tuple[str, str]] = (),
|
1205
1206
|
) -> AsyncOperation:
|
@@ -1242,7 +1243,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1242
1243
|
region: str,
|
1243
1244
|
cluster_name: str,
|
1244
1245
|
project_id: str,
|
1245
|
-
retry:
|
1246
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1246
1247
|
timeout: float | None = None,
|
1247
1248
|
metadata: Sequence[tuple[str, str]] = (),
|
1248
1249
|
) -> str:
|
@@ -1277,7 +1278,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1277
1278
|
region: str,
|
1278
1279
|
cluster_name: str,
|
1279
1280
|
project_id: str,
|
1280
|
-
retry:
|
1281
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1281
1282
|
timeout: float | None = None,
|
1282
1283
|
metadata: Sequence[tuple[str, str]] = (),
|
1283
1284
|
) -> Cluster:
|
@@ -1309,7 +1310,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1309
1310
|
filter_: str,
|
1310
1311
|
project_id: str,
|
1311
1312
|
page_size: int | None = None,
|
1312
|
-
retry:
|
1313
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1313
1314
|
timeout: float | None = None,
|
1314
1315
|
metadata: Sequence[tuple[str, str]] = (),
|
1315
1316
|
):
|
@@ -1349,7 +1350,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1349
1350
|
region: str,
|
1350
1351
|
graceful_decommission_timeout: dict | Duration | None = None,
|
1351
1352
|
request_id: str | None = None,
|
1352
|
-
retry:
|
1353
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1353
1354
|
timeout: float | None = None,
|
1354
1355
|
metadata: Sequence[tuple[str, str]] = (),
|
1355
1356
|
) -> AsyncOperation:
|
@@ -1429,7 +1430,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1429
1430
|
template: dict | WorkflowTemplate,
|
1430
1431
|
project_id: str,
|
1431
1432
|
region: str,
|
1432
|
-
retry:
|
1433
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1433
1434
|
timeout: float | None = None,
|
1434
1435
|
metadata: Sequence[tuple[str, str]] = (),
|
1435
1436
|
) -> WorkflowTemplate:
|
@@ -1465,7 +1466,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1465
1466
|
version: int | None = None,
|
1466
1467
|
request_id: str | None = None,
|
1467
1468
|
parameters: dict[str, str] | None = None,
|
1468
|
-
retry:
|
1469
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1469
1470
|
timeout: float | None = None,
|
1470
1471
|
metadata: Sequence[tuple[str, str]] = (),
|
1471
1472
|
) -> AsyncOperation:
|
@@ -1511,7 +1512,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1511
1512
|
project_id: str,
|
1512
1513
|
region: str,
|
1513
1514
|
request_id: str | None = None,
|
1514
|
-
retry:
|
1515
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1515
1516
|
timeout: float | None = None,
|
1516
1517
|
metadata: Sequence[tuple[str, str]] = (),
|
1517
1518
|
) -> AsyncOperation:
|
@@ -1554,7 +1555,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1554
1555
|
job_id: str,
|
1555
1556
|
project_id: str,
|
1556
1557
|
region: str,
|
1557
|
-
retry:
|
1558
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1558
1559
|
timeout: float | None = None,
|
1559
1560
|
metadata: Sequence[tuple[str, str]] = (),
|
1560
1561
|
) -> Job:
|
@@ -1588,7 +1589,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1588
1589
|
project_id: str,
|
1589
1590
|
region: str,
|
1590
1591
|
request_id: str | None = None,
|
1591
|
-
retry:
|
1592
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1592
1593
|
timeout: float | None = None,
|
1593
1594
|
metadata: Sequence[tuple[str, str]] = (),
|
1594
1595
|
) -> Job:
|
@@ -1624,7 +1625,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1624
1625
|
job_id: str,
|
1625
1626
|
project_id: str,
|
1626
1627
|
region: str | None = None,
|
1627
|
-
retry:
|
1628
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1628
1629
|
timeout: float | None = None,
|
1629
1630
|
metadata: Sequence[tuple[str, str]] = (),
|
1630
1631
|
) -> Job:
|
@@ -1658,7 +1659,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1658
1659
|
batch: dict | Batch,
|
1659
1660
|
batch_id: str | None = None,
|
1660
1661
|
request_id: str | None = None,
|
1661
|
-
retry:
|
1662
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1662
1663
|
timeout: float | None = None,
|
1663
1664
|
metadata: Sequence[tuple[str, str]] = (),
|
1664
1665
|
) -> AsyncOperation:
|
@@ -1703,7 +1704,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1703
1704
|
batch_id: str,
|
1704
1705
|
region: str,
|
1705
1706
|
project_id: str,
|
1706
|
-
retry:
|
1707
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1707
1708
|
timeout: float | None = None,
|
1708
1709
|
metadata: Sequence[tuple[str, str]] = (),
|
1709
1710
|
) -> None:
|
@@ -1737,7 +1738,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1737
1738
|
batch_id: str,
|
1738
1739
|
region: str,
|
1739
1740
|
project_id: str,
|
1740
|
-
retry:
|
1741
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1741
1742
|
timeout: float | None = None,
|
1742
1743
|
metadata: Sequence[tuple[str, str]] = (),
|
1743
1744
|
) -> Batch:
|
@@ -1773,7 +1774,7 @@ class DataprocAsyncHook(GoogleBaseHook):
|
|
1773
1774
|
project_id: str,
|
1774
1775
|
page_size: int | None = None,
|
1775
1776
|
page_token: str | None = None,
|
1776
|
-
retry:
|
1777
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
1777
1778
|
timeout: float | None = None,
|
1778
1779
|
metadata: Sequence[tuple[str, str]] = (),
|
1779
1780
|
filter: str | None = None,
|
@@ -822,10 +822,12 @@ class GCSHook(GoogleBaseHook):
|
|
822
822
|
versions=versions,
|
823
823
|
)
|
824
824
|
|
825
|
+
blob_names = [blob.name for blob in blobs]
|
826
|
+
|
825
827
|
if blobs.prefixes:
|
826
828
|
ids.extend(blobs.prefixes)
|
827
829
|
else:
|
828
|
-
ids.extend(
|
830
|
+
ids.extend(blob_names)
|
829
831
|
|
830
832
|
page_token = blobs.next_page_token
|
831
833
|
if page_token is None:
|
@@ -933,14 +935,16 @@ class GCSHook(GoogleBaseHook):
|
|
933
935
|
versions=versions,
|
934
936
|
)
|
935
937
|
|
938
|
+
blob_names = [
|
939
|
+
blob.name
|
940
|
+
for blob in blobs
|
941
|
+
if timespan_start <= blob.updated.replace(tzinfo=timezone.utc) < timespan_end
|
942
|
+
]
|
943
|
+
|
936
944
|
if blobs.prefixes:
|
937
945
|
ids.extend(blobs.prefixes)
|
938
946
|
else:
|
939
|
-
ids.extend(
|
940
|
-
blob.name
|
941
|
-
for blob in blobs
|
942
|
-
if timespan_start <= blob.updated.replace(tzinfo=timezone.utc) < timespan_end
|
943
|
-
)
|
947
|
+
ids.extend(blob_names)
|
944
948
|
|
945
949
|
page_token = blobs.next_page_token
|
946
950
|
if page_token is None:
|
@@ -49,6 +49,7 @@ from airflow.version import version
|
|
49
49
|
|
50
50
|
if TYPE_CHECKING:
|
51
51
|
from google.api_core.retry import Retry
|
52
|
+
from google.api_core.retry_async import AsyncRetry
|
52
53
|
from google.cloud.pubsub_v1.types import (
|
53
54
|
DeadLetterPolicy,
|
54
55
|
Duration,
|
@@ -611,7 +612,7 @@ class PubSubAsyncHook(GoogleBaseAsyncHook):
|
|
611
612
|
project_id: str,
|
612
613
|
ack_ids: list[str] | None = None,
|
613
614
|
messages: list[ReceivedMessage] | None = None,
|
614
|
-
retry:
|
615
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
615
616
|
timeout: float | None = None,
|
616
617
|
metadata: Sequence[tuple[str, str]] = (),
|
617
618
|
) -> None:
|
@@ -665,7 +666,7 @@ class PubSubAsyncHook(GoogleBaseAsyncHook):
|
|
665
666
|
max_messages: int,
|
666
667
|
project_id: str = PROVIDE_PROJECT_ID,
|
667
668
|
return_immediately: bool = False,
|
668
|
-
retry:
|
669
|
+
retry: AsyncRetry | _MethodDefault = DEFAULT,
|
669
670
|
timeout: float | None = None,
|
670
671
|
metadata: Sequence[tuple[str, str]] = (),
|
671
672
|
) -> list[ReceivedMessage]:
|
@@ -26,7 +26,6 @@ from typing import TYPE_CHECKING, Collection
|
|
26
26
|
|
27
27
|
# not sure why but mypy complains on missing `storage` but it is clearly there and is importable
|
28
28
|
from google.cloud import storage # type: ignore[attr-defined]
|
29
|
-
from packaging.version import Version
|
30
29
|
|
31
30
|
from airflow.configuration import conf
|
32
31
|
from airflow.exceptions import AirflowNotFoundException
|
@@ -48,18 +47,6 @@ _DEFAULT_SCOPESS = frozenset(
|
|
48
47
|
logger = logging.getLogger(__name__)
|
49
48
|
|
50
49
|
|
51
|
-
def get_default_delete_local_copy():
|
52
|
-
"""Load delete_local_logs conf if Airflow version > 2.6 and return False if not.
|
53
|
-
|
54
|
-
TODO: delete this function when min airflow version >= 2.6.
|
55
|
-
"""
|
56
|
-
from airflow.version import version
|
57
|
-
|
58
|
-
if Version(version) < Version("2.6"):
|
59
|
-
return False
|
60
|
-
return conf.getboolean("logging", "delete_local_logs")
|
61
|
-
|
62
|
-
|
63
50
|
class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
64
51
|
"""
|
65
52
|
GCSTaskHandler is a python log handler that handles and reads task instance logs.
|
@@ -108,8 +95,8 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
|
108
95
|
self.gcp_keyfile_dict = gcp_keyfile_dict
|
109
96
|
self.scopes = gcp_scopes
|
110
97
|
self.project_id = project_id
|
111
|
-
self.delete_local_copy = (
|
112
|
-
|
98
|
+
self.delete_local_copy = kwargs.get(
|
99
|
+
"delete_local_copy", conf.getboolean("logging", "delete_local_logs")
|
113
100
|
)
|
114
101
|
|
115
102
|
@cached_property
|
@@ -218,30 +205,6 @@ class GCSTaskHandler(FileTaskHandler, LoggingMixin):
|
|
218
205
|
messages.append(f"Unable to read remote log {e}")
|
219
206
|
return messages, logs
|
220
207
|
|
221
|
-
def _read(self, ti, try_number, metadata=None):
|
222
|
-
"""
|
223
|
-
Read logs of given task instance and try_number from GCS.
|
224
|
-
|
225
|
-
If failed, read the log from task instance host machine.
|
226
|
-
|
227
|
-
todo: when min airflow version >= 2.6, remove this method
|
228
|
-
|
229
|
-
:param ti: task instance object
|
230
|
-
:param try_number: task instance try_number to read logs from
|
231
|
-
:param metadata: log metadata,
|
232
|
-
can be used for steaming log reading and auto-tailing.
|
233
|
-
"""
|
234
|
-
if hasattr(super(), "_read_remote_logs"):
|
235
|
-
# from Airflow 2.6, we don't implement the `_read` method.
|
236
|
-
# if parent has _read_remote_logs, we're >= 2.6
|
237
|
-
return super()._read(ti, try_number, metadata)
|
238
|
-
|
239
|
-
messages, logs = self._read_remote_logs(ti, try_number, metadata)
|
240
|
-
if not logs:
|
241
|
-
return super()._read(ti, try_number, metadata)
|
242
|
-
|
243
|
-
return "".join([f"*** {x}\n" for x in messages]) + "\n".join(logs), {"end_of_log": True}
|
244
|
-
|
245
208
|
def gcs_write(self, log, remote_log_location) -> bool:
|
246
209
|
"""
|
247
210
|
Write the log to the remote location and return `True`; fail silently and return `False` on error.
|
@@ -30,21 +30,13 @@ from google.cloud.logging_v2.types import ListLogEntriesRequest, ListLogEntriesR
|
|
30
30
|
|
31
31
|
from airflow.providers.google.cloud.utils.credentials_provider import get_credentials_and_project_id
|
32
32
|
from airflow.providers.google.common.consts import CLIENT_INFO
|
33
|
+
from airflow.utils.log.trigger_handler import ctx_indiv_trigger
|
33
34
|
|
34
35
|
if TYPE_CHECKING:
|
35
|
-
from contextvars import ContextVar
|
36
|
-
|
37
36
|
from google.auth.credentials import Credentials
|
38
37
|
|
39
38
|
from airflow.models import TaskInstance
|
40
39
|
|
41
|
-
try:
|
42
|
-
# todo: remove this conditional import when min airflow version >= 2.6
|
43
|
-
ctx_indiv_trigger: ContextVar | None
|
44
|
-
from airflow.utils.log.trigger_handler import ctx_indiv_trigger
|
45
|
-
except ImportError:
|
46
|
-
ctx_indiv_trigger = None
|
47
|
-
|
48
40
|
DEFAULT_LOGGER_NAME = "airflow"
|
49
41
|
_GLOBAL_RESOURCE = Resource(type="global", labels={})
|
50
42
|
|
@@ -174,8 +166,7 @@ class StackdriverTaskHandler(logging.Handler):
|
|
174
166
|
"""
|
175
167
|
message = self.format(record)
|
176
168
|
ti = None
|
177
|
-
|
178
|
-
if ctx_indiv_trigger is not None and getattr(record, ctx_indiv_trigger.name, None):
|
169
|
+
if getattr(record, ctx_indiv_trigger.name, None):
|
179
170
|
ti = getattr(record, "task_instance", None) # trigger context
|
180
171
|
labels = self._get_labels(ti)
|
181
172
|
self._transport.send(record, message, resource=self.resource, labels=labels)
|