apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/google/__init__.py +5 -8
- airflow/providers/google/cloud/hooks/automl.py +35 -1
- airflow/providers/google/cloud/hooks/bigquery.py +126 -41
- airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
- airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
- airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
- airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
- airflow/providers/google/cloud/hooks/dataflow.py +246 -32
- airflow/providers/google/cloud/hooks/dataplex.py +6 -2
- airflow/providers/google/cloud/hooks/dlp.py +14 -14
- airflow/providers/google/cloud/hooks/gcs.py +6 -2
- airflow/providers/google/cloud/hooks/gdm.py +2 -2
- airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
- airflow/providers/google/cloud/hooks/mlengine.py +8 -4
- airflow/providers/google/cloud/hooks/pubsub.py +1 -1
- airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
- airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
- airflow/providers/google/cloud/hooks/vertex_ai/prediction_service.py +91 -0
- airflow/providers/google/cloud/links/vertex_ai.py +2 -1
- airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
- airflow/providers/google/cloud/operators/automl.py +243 -37
- airflow/providers/google/cloud/operators/bigquery.py +164 -62
- airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
- airflow/providers/google/cloud/operators/bigtable.py +7 -6
- airflow/providers/google/cloud/operators/cloud_build.py +12 -11
- airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
- airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
- airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
- airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
- airflow/providers/google/cloud/operators/compute.py +12 -11
- airflow/providers/google/cloud/operators/datacatalog.py +21 -20
- airflow/providers/google/cloud/operators/dataflow.py +59 -42
- airflow/providers/google/cloud/operators/datafusion.py +11 -10
- airflow/providers/google/cloud/operators/datapipeline.py +3 -2
- airflow/providers/google/cloud/operators/dataprep.py +5 -4
- airflow/providers/google/cloud/operators/dataproc.py +20 -17
- airflow/providers/google/cloud/operators/datastore.py +8 -7
- airflow/providers/google/cloud/operators/dlp.py +31 -30
- airflow/providers/google/cloud/operators/functions.py +4 -3
- airflow/providers/google/cloud/operators/gcs.py +66 -41
- airflow/providers/google/cloud/operators/kubernetes_engine.py +256 -49
- airflow/providers/google/cloud/operators/life_sciences.py +2 -1
- airflow/providers/google/cloud/operators/mlengine.py +11 -10
- airflow/providers/google/cloud/operators/pubsub.py +6 -5
- airflow/providers/google/cloud/operators/spanner.py +7 -6
- airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
- airflow/providers/google/cloud/operators/stackdriver.py +11 -10
- airflow/providers/google/cloud/operators/tasks.py +14 -13
- airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
- airflow/providers/google/cloud/operators/translate_speech.py +2 -1
- airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
- airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
- airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
- airflow/providers/google/cloud/operators/vision.py +13 -12
- airflow/providers/google/cloud/operators/workflows.py +12 -14
- airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
- airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
- airflow/providers/google/cloud/sensors/bigtable.py +2 -1
- airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/sensors/dataflow.py +239 -52
- airflow/providers/google/cloud/sensors/datafusion.py +2 -1
- airflow/providers/google/cloud/sensors/dataproc.py +3 -2
- airflow/providers/google/cloud/sensors/gcs.py +14 -12
- airflow/providers/google/cloud/sensors/tasks.py +2 -1
- airflow/providers/google/cloud/sensors/workflows.py +2 -1
- airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
- airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
- airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
- airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
- airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
- airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
- airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
- airflow/providers/google/cloud/triggers/bigquery.py +75 -6
- airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
- airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
- airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
- airflow/providers/google/cloud/triggers/dataflow.py +504 -4
- airflow/providers/google/cloud/triggers/dataproc.py +190 -27
- airflow/providers/google/cloud/triggers/kubernetes_engine.py +2 -3
- airflow/providers/google/cloud/triggers/mlengine.py +2 -1
- airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
- airflow/providers/google/common/hooks/base_google.py +45 -7
- airflow/providers/google/firebase/hooks/firestore.py +2 -2
- airflow/providers/google/firebase/operators/firestore.py +2 -1
- airflow/providers/google/get_provider_info.py +5 -3
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/METADATA +18 -18
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/RECORD +90 -90
- airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0.dist-info}/entry_points.txt +0 -0
@@ -19,14 +19,23 @@
|
|
19
19
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
|
-
from
|
22
|
+
from functools import cached_property
|
23
|
+
from typing import TYPE_CHECKING, Any, Callable, Sequence
|
23
24
|
|
25
|
+
from airflow.configuration import conf
|
24
26
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
25
27
|
from airflow.providers.google.cloud.hooks.dataflow import (
|
26
28
|
DEFAULT_DATAFLOW_LOCATION,
|
27
29
|
DataflowHook,
|
28
30
|
DataflowJobStatus,
|
29
31
|
)
|
32
|
+
from airflow.providers.google.cloud.triggers.dataflow import (
|
33
|
+
DataflowJobAutoScalingEventTrigger,
|
34
|
+
DataflowJobMessagesTrigger,
|
35
|
+
DataflowJobMetricsTrigger,
|
36
|
+
DataflowJobStatusTrigger,
|
37
|
+
)
|
38
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
30
39
|
from airflow.sensors.base import BaseSensorOperator
|
31
40
|
|
32
41
|
if TYPE_CHECKING:
|
@@ -42,7 +51,7 @@ class DataflowJobStatusSensor(BaseSensorOperator):
|
|
42
51
|
:ref:`howto/operator:DataflowJobStatusSensor`
|
43
52
|
|
44
53
|
:param job_id: ID of the job to be checked.
|
45
|
-
:param expected_statuses: The expected state of the operation.
|
54
|
+
:param expected_statuses: The expected state(s) of the operation.
|
46
55
|
See:
|
47
56
|
https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
|
48
57
|
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
@@ -58,6 +67,8 @@ class DataflowJobStatusSensor(BaseSensorOperator):
|
|
58
67
|
If set as a sequence, the identities from the list must grant
|
59
68
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
60
69
|
account from the list granting this role to the originating account (templated).
|
70
|
+
:param deferrable: If True, run the sensor in the deferrable mode.
|
71
|
+
:param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
|
61
72
|
"""
|
62
73
|
|
63
74
|
template_fields: Sequence[str] = ("job_id",)
|
@@ -67,10 +78,12 @@ class DataflowJobStatusSensor(BaseSensorOperator):
|
|
67
78
|
*,
|
68
79
|
job_id: str,
|
69
80
|
expected_statuses: set[str] | str,
|
70
|
-
project_id: str
|
81
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
71
82
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
72
83
|
gcp_conn_id: str = "google_cloud_default",
|
73
84
|
impersonation_chain: str | Sequence[str] | None = None,
|
85
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
86
|
+
poll_interval: int = 10,
|
74
87
|
**kwargs,
|
75
88
|
) -> None:
|
76
89
|
super().__init__(**kwargs)
|
@@ -82,7 +95,8 @@ class DataflowJobStatusSensor(BaseSensorOperator):
|
|
82
95
|
self.location = location
|
83
96
|
self.gcp_conn_id = gcp_conn_id
|
84
97
|
self.impersonation_chain = impersonation_chain
|
85
|
-
self.
|
98
|
+
self.deferrable = deferrable
|
99
|
+
self.poll_interval = poll_interval
|
86
100
|
|
87
101
|
def poke(self, context: Context) -> bool:
|
88
102
|
self.log.info(
|
@@ -90,10 +104,6 @@ class DataflowJobStatusSensor(BaseSensorOperator):
|
|
90
104
|
self.job_id,
|
91
105
|
", ".join(self.expected_statuses),
|
92
106
|
)
|
93
|
-
self.hook = DataflowHook(
|
94
|
-
gcp_conn_id=self.gcp_conn_id,
|
95
|
-
impersonation_chain=self.impersonation_chain,
|
96
|
-
)
|
97
107
|
|
98
108
|
job = self.hook.get_job(
|
99
109
|
job_id=self.job_id,
|
@@ -115,10 +125,51 @@ class DataflowJobStatusSensor(BaseSensorOperator):
|
|
115
125
|
|
116
126
|
return False
|
117
127
|
|
128
|
+
def execute(self, context: Context) -> None:
|
129
|
+
"""Airflow runs this method on the worker and defers using the trigger."""
|
130
|
+
if not self.deferrable:
|
131
|
+
super().execute(context)
|
132
|
+
elif not self.poke(context=context):
|
133
|
+
self.defer(
|
134
|
+
timeout=self.execution_timeout,
|
135
|
+
trigger=DataflowJobStatusTrigger(
|
136
|
+
job_id=self.job_id,
|
137
|
+
expected_statuses=self.expected_statuses,
|
138
|
+
project_id=self.project_id,
|
139
|
+
location=self.location,
|
140
|
+
gcp_conn_id=self.gcp_conn_id,
|
141
|
+
poll_sleep=self.poll_interval,
|
142
|
+
impersonation_chain=self.impersonation_chain,
|
143
|
+
),
|
144
|
+
method_name="execute_complete",
|
145
|
+
)
|
146
|
+
|
147
|
+
def execute_complete(self, context: Context, event: dict[str, str | list]) -> bool:
|
148
|
+
"""
|
149
|
+
Execute this method when the task resumes its execution on the worker after deferral.
|
150
|
+
|
151
|
+
Returns True if the trigger returns an event with the success status, otherwise raises
|
152
|
+
an exception.
|
153
|
+
"""
|
154
|
+
if event["status"] == "success":
|
155
|
+
self.log.info(event["message"])
|
156
|
+
return True
|
157
|
+
# TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
|
158
|
+
if self.soft_fail:
|
159
|
+
raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
|
160
|
+
raise AirflowException(f"Sensor failed with the following message: {event['message']}")
|
161
|
+
|
162
|
+
@cached_property
|
163
|
+
def hook(self) -> DataflowHook:
|
164
|
+
return DataflowHook(
|
165
|
+
gcp_conn_id=self.gcp_conn_id,
|
166
|
+
impersonation_chain=self.impersonation_chain,
|
167
|
+
)
|
168
|
+
|
118
169
|
|
119
170
|
class DataflowJobMetricsSensor(BaseSensorOperator):
|
120
171
|
"""
|
121
|
-
Checks
|
172
|
+
Checks for metrics associated with a single job in Google Cloud Dataflow.
|
122
173
|
|
123
174
|
.. seealso::
|
124
175
|
For more information on how to use this operator, take a look at the guide:
|
@@ -143,6 +194,9 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
|
|
143
194
|
If set as a sequence, the identities from the list must grant
|
144
195
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
145
196
|
account from the list granting this role to the originating account (templated).
|
197
|
+
:param deferrable: If True, run the sensor in the deferrable mode.
|
198
|
+
:param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
|
199
|
+
|
146
200
|
"""
|
147
201
|
|
148
202
|
template_fields: Sequence[str] = ("job_id",)
|
@@ -151,12 +205,14 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
|
|
151
205
|
self,
|
152
206
|
*,
|
153
207
|
job_id: str,
|
154
|
-
callback: Callable
|
208
|
+
callback: Callable | None = None,
|
155
209
|
fail_on_terminal_state: bool = True,
|
156
|
-
project_id: str
|
210
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
157
211
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
158
212
|
gcp_conn_id: str = "google_cloud_default",
|
159
213
|
impersonation_chain: str | Sequence[str] | None = None,
|
214
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
215
|
+
poll_interval: int = 10,
|
160
216
|
**kwargs,
|
161
217
|
) -> None:
|
162
218
|
super().__init__(**kwargs)
|
@@ -167,14 +223,10 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
|
|
167
223
|
self.location = location
|
168
224
|
self.gcp_conn_id = gcp_conn_id
|
169
225
|
self.impersonation_chain = impersonation_chain
|
170
|
-
self.
|
226
|
+
self.deferrable = deferrable
|
227
|
+
self.poll_interval = poll_interval
|
171
228
|
|
172
229
|
def poke(self, context: Context) -> bool:
|
173
|
-
self.hook = DataflowHook(
|
174
|
-
gcp_conn_id=self.gcp_conn_id,
|
175
|
-
impersonation_chain=self.impersonation_chain,
|
176
|
-
)
|
177
|
-
|
178
230
|
if self.fail_on_terminal_state:
|
179
231
|
job = self.hook.get_job(
|
180
232
|
job_id=self.job_id,
|
@@ -194,27 +246,73 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
|
|
194
246
|
project_id=self.project_id,
|
195
247
|
location=self.location,
|
196
248
|
)
|
249
|
+
return result["metrics"] if self.callback is None else self.callback(result["metrics"])
|
250
|
+
|
251
|
+
def execute(self, context: Context) -> Any:
|
252
|
+
"""Airflow runs this method on the worker and defers using the trigger."""
|
253
|
+
if not self.deferrable:
|
254
|
+
super().execute(context)
|
255
|
+
else:
|
256
|
+
self.defer(
|
257
|
+
timeout=self.execution_timeout,
|
258
|
+
trigger=DataflowJobMetricsTrigger(
|
259
|
+
job_id=self.job_id,
|
260
|
+
project_id=self.project_id,
|
261
|
+
location=self.location,
|
262
|
+
gcp_conn_id=self.gcp_conn_id,
|
263
|
+
poll_sleep=self.poll_interval,
|
264
|
+
impersonation_chain=self.impersonation_chain,
|
265
|
+
fail_on_terminal_state=self.fail_on_terminal_state,
|
266
|
+
),
|
267
|
+
method_name="execute_complete",
|
268
|
+
)
|
197
269
|
|
198
|
-
|
270
|
+
def execute_complete(self, context: Context, event: dict[str, str | list]) -> Any:
|
271
|
+
"""
|
272
|
+
Execute this method when the task resumes its execution on the worker after deferral.
|
273
|
+
|
274
|
+
If the trigger returns an event with success status - passes the event result to the callback function.
|
275
|
+
Returns the event result if no callback function is provided.
|
276
|
+
|
277
|
+
If the trigger returns an event with error status - raises an exception.
|
278
|
+
"""
|
279
|
+
if event["status"] == "success":
|
280
|
+
self.log.info(event["message"])
|
281
|
+
return event["result"] if self.callback is None else self.callback(event["result"])
|
282
|
+
# TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
|
283
|
+
if self.soft_fail:
|
284
|
+
raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
|
285
|
+
raise AirflowException(f"Sensor failed with the following message: {event['message']}")
|
286
|
+
|
287
|
+
@cached_property
|
288
|
+
def hook(self) -> DataflowHook:
|
289
|
+
return DataflowHook(
|
290
|
+
gcp_conn_id=self.gcp_conn_id,
|
291
|
+
impersonation_chain=self.impersonation_chain,
|
292
|
+
)
|
199
293
|
|
200
294
|
|
201
295
|
class DataflowJobMessagesSensor(BaseSensorOperator):
|
202
296
|
"""
|
203
|
-
Checks for
|
297
|
+
Checks for job messages associated with a single job in Google Cloud Dataflow.
|
204
298
|
|
205
299
|
.. seealso::
|
206
300
|
For more information on how to use this operator, take a look at the guide:
|
207
301
|
:ref:`howto/operator:DataflowJobMessagesSensor`
|
208
302
|
|
209
|
-
:param job_id: ID of the job to be checked.
|
210
|
-
:param callback:
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
303
|
+
:param job_id: ID of the Dataflow job to be checked.
|
304
|
+
:param callback: a function that can accept a list of serialized job messages.
|
305
|
+
It can do whatever you want it to do. If the callback function is not provided,
|
306
|
+
then on successful completion the task will exit with True value.
|
307
|
+
For more info about the job message content see:
|
308
|
+
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.JobMessage
|
309
|
+
:param fail_on_terminal_state: If set to True the sensor will raise an exception when the job reaches a terminal state.
|
310
|
+
No job messages will be returned.
|
215
311
|
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
216
312
|
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
217
|
-
:param location:
|
313
|
+
:param location: The location of the Dataflow job (for example europe-west1).
|
314
|
+
If set to None then the value of DEFAULT_DATAFLOW_LOCATION will be used.
|
315
|
+
See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints
|
218
316
|
:param gcp_conn_id: The connection ID to use connecting to Google Cloud.
|
219
317
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
220
318
|
credentials, or chained list of accounts required to get the access_token
|
@@ -224,6 +322,8 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
|
|
224
322
|
If set as a sequence, the identities from the list must grant
|
225
323
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
226
324
|
account from the list granting this role to the originating account (templated).
|
325
|
+
:param deferrable: If True, run the sensor in the deferrable mode.
|
326
|
+
:param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
|
227
327
|
"""
|
228
328
|
|
229
329
|
template_fields: Sequence[str] = ("job_id",)
|
@@ -232,12 +332,14 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
|
|
232
332
|
self,
|
233
333
|
*,
|
234
334
|
job_id: str,
|
235
|
-
callback: Callable,
|
335
|
+
callback: Callable | None = None,
|
236
336
|
fail_on_terminal_state: bool = True,
|
237
|
-
project_id: str
|
337
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
238
338
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
239
339
|
gcp_conn_id: str = "google_cloud_default",
|
240
340
|
impersonation_chain: str | Sequence[str] | None = None,
|
341
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
342
|
+
poll_interval: int = 10,
|
241
343
|
**kwargs,
|
242
344
|
) -> None:
|
243
345
|
super().__init__(**kwargs)
|
@@ -248,14 +350,10 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
|
|
248
350
|
self.location = location
|
249
351
|
self.gcp_conn_id = gcp_conn_id
|
250
352
|
self.impersonation_chain = impersonation_chain
|
251
|
-
self.
|
353
|
+
self.deferrable = deferrable
|
354
|
+
self.poll_interval = poll_interval
|
252
355
|
|
253
356
|
def poke(self, context: Context) -> bool:
|
254
|
-
self.hook = DataflowHook(
|
255
|
-
gcp_conn_id=self.gcp_conn_id,
|
256
|
-
impersonation_chain=self.impersonation_chain,
|
257
|
-
)
|
258
|
-
|
259
357
|
if self.fail_on_terminal_state:
|
260
358
|
job = self.hook.get_job(
|
261
359
|
job_id=self.job_id,
|
@@ -276,26 +374,73 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
|
|
276
374
|
location=self.location,
|
277
375
|
)
|
278
376
|
|
279
|
-
return self.callback(result)
|
377
|
+
return result if self.callback is None else self.callback(result)
|
378
|
+
|
379
|
+
def execute(self, context: Context) -> Any:
|
380
|
+
"""Airflow runs this method on the worker and defers using the trigger."""
|
381
|
+
if not self.deferrable:
|
382
|
+
super().execute(context)
|
383
|
+
else:
|
384
|
+
self.defer(
|
385
|
+
timeout=self.execution_timeout,
|
386
|
+
trigger=DataflowJobMessagesTrigger(
|
387
|
+
job_id=self.job_id,
|
388
|
+
project_id=self.project_id,
|
389
|
+
location=self.location,
|
390
|
+
gcp_conn_id=self.gcp_conn_id,
|
391
|
+
poll_sleep=self.poll_interval,
|
392
|
+
impersonation_chain=self.impersonation_chain,
|
393
|
+
fail_on_terminal_state=self.fail_on_terminal_state,
|
394
|
+
),
|
395
|
+
method_name="execute_complete",
|
396
|
+
)
|
397
|
+
|
398
|
+
def execute_complete(self, context: Context, event: dict[str, str | list]) -> Any:
|
399
|
+
"""
|
400
|
+
Execute this method when the task resumes its execution on the worker after deferral.
|
401
|
+
|
402
|
+
If the trigger returns an event with success status - passes the event result to the callback function.
|
403
|
+
Returns the event result if no callback function is provided.
|
404
|
+
|
405
|
+
If the trigger returns an event with error status - raises an exception.
|
406
|
+
"""
|
407
|
+
if event["status"] == "success":
|
408
|
+
self.log.info(event["message"])
|
409
|
+
return event["result"] if self.callback is None else self.callback(event["result"])
|
410
|
+
# TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
|
411
|
+
if self.soft_fail:
|
412
|
+
raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
|
413
|
+
raise AirflowException(f"Sensor failed with the following message: {event['message']}")
|
414
|
+
|
415
|
+
@cached_property
|
416
|
+
def hook(self) -> DataflowHook:
|
417
|
+
return DataflowHook(
|
418
|
+
gcp_conn_id=self.gcp_conn_id,
|
419
|
+
impersonation_chain=self.impersonation_chain,
|
420
|
+
)
|
280
421
|
|
281
422
|
|
282
423
|
class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
|
283
424
|
"""
|
284
|
-
Checks for
|
425
|
+
Checks for autoscaling events associated with a single job in Google Cloud Dataflow.
|
285
426
|
|
286
427
|
.. seealso::
|
287
428
|
For more information on how to use this operator, take a look at the guide:
|
288
429
|
:ref:`howto/operator:DataflowJobAutoScalingEventsSensor`
|
289
430
|
|
290
|
-
:param job_id: ID of the job to be checked.
|
291
|
-
:param callback:
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
431
|
+
:param job_id: ID of the Dataflow job to be checked.
|
432
|
+
:param callback: a function that can accept a list of serialized autoscaling events.
|
433
|
+
It can do whatever you want it to do. If the callback function is not provided,
|
434
|
+
then on successful completion the task will exit with True value.
|
435
|
+
For more info about the autoscaling event content see:
|
436
|
+
https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.AutoscalingEvent
|
437
|
+
:param fail_on_terminal_state: If set to True the sensor will raise an exception when the job reaches a terminal state.
|
438
|
+
No autoscaling events will be returned.
|
296
439
|
:param project_id: Optional, the Google Cloud project ID in which to start a job.
|
297
440
|
If set to None or missing, the default project_id from the Google Cloud connection is used.
|
298
|
-
:param location:
|
441
|
+
:param location: The location of the Dataflow job (for example europe-west1).
|
442
|
+
If set to None then the value of DEFAULT_DATAFLOW_LOCATION will be used.
|
443
|
+
See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints
|
299
444
|
:param gcp_conn_id: The connection ID to use connecting to Google Cloud.
|
300
445
|
:param impersonation_chain: Optional service account to impersonate using short-term
|
301
446
|
credentials, or chained list of accounts required to get the access_token
|
@@ -305,6 +450,8 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
|
|
305
450
|
If set as a sequence, the identities from the list must grant
|
306
451
|
Service Account Token Creator IAM role to the directly preceding identity, with first
|
307
452
|
account from the list granting this role to the originating account (templated).
|
453
|
+
:param deferrable: If True, run the sensor in the deferrable mode.
|
454
|
+
:param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
|
308
455
|
"""
|
309
456
|
|
310
457
|
template_fields: Sequence[str] = ("job_id",)
|
@@ -313,12 +460,14 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
|
|
313
460
|
self,
|
314
461
|
*,
|
315
462
|
job_id: str,
|
316
|
-
callback: Callable,
|
463
|
+
callback: Callable | None = None,
|
317
464
|
fail_on_terminal_state: bool = True,
|
318
|
-
project_id: str
|
465
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
319
466
|
location: str = DEFAULT_DATAFLOW_LOCATION,
|
320
467
|
gcp_conn_id: str = "google_cloud_default",
|
321
468
|
impersonation_chain: str | Sequence[str] | None = None,
|
469
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
470
|
+
poll_interval: int = 60,
|
322
471
|
**kwargs,
|
323
472
|
) -> None:
|
324
473
|
super().__init__(**kwargs)
|
@@ -329,14 +478,10 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
|
|
329
478
|
self.location = location
|
330
479
|
self.gcp_conn_id = gcp_conn_id
|
331
480
|
self.impersonation_chain = impersonation_chain
|
332
|
-
self.
|
481
|
+
self.deferrable = deferrable
|
482
|
+
self.poll_interval = poll_interval
|
333
483
|
|
334
484
|
def poke(self, context: Context) -> bool:
|
335
|
-
self.hook = DataflowHook(
|
336
|
-
gcp_conn_id=self.gcp_conn_id,
|
337
|
-
impersonation_chain=self.impersonation_chain,
|
338
|
-
)
|
339
|
-
|
340
485
|
if self.fail_on_terminal_state:
|
341
486
|
job = self.hook.get_job(
|
342
487
|
job_id=self.job_id,
|
@@ -357,4 +502,46 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
|
|
357
502
|
location=self.location,
|
358
503
|
)
|
359
504
|
|
360
|
-
return self.callback(result)
|
505
|
+
return result if self.callback is None else self.callback(result)
|
506
|
+
|
507
|
+
def execute(self, context: Context) -> Any:
|
508
|
+
"""Airflow runs this method on the worker and defers using the trigger."""
|
509
|
+
if not self.deferrable:
|
510
|
+
super().execute(context)
|
511
|
+
else:
|
512
|
+
self.defer(
|
513
|
+
trigger=DataflowJobAutoScalingEventTrigger(
|
514
|
+
job_id=self.job_id,
|
515
|
+
project_id=self.project_id,
|
516
|
+
location=self.location,
|
517
|
+
gcp_conn_id=self.gcp_conn_id,
|
518
|
+
poll_sleep=self.poll_interval,
|
519
|
+
impersonation_chain=self.impersonation_chain,
|
520
|
+
fail_on_terminal_state=self.fail_on_terminal_state,
|
521
|
+
),
|
522
|
+
method_name="execute_complete",
|
523
|
+
)
|
524
|
+
|
525
|
+
def execute_complete(self, context: Context, event: dict[str, str | list]) -> Any:
|
526
|
+
"""
|
527
|
+
Execute this method when the task resumes its execution on the worker after deferral.
|
528
|
+
|
529
|
+
If the trigger returns an event with success status - passes the event result to the callback function.
|
530
|
+
Returns the event result if no callback function is provided.
|
531
|
+
|
532
|
+
If the trigger returns an event with error status - raises an exception.
|
533
|
+
"""
|
534
|
+
if event["status"] == "success":
|
535
|
+
self.log.info(event["message"])
|
536
|
+
return event["result"] if self.callback is None else self.callback(event["result"])
|
537
|
+
# TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
|
538
|
+
if self.soft_fail:
|
539
|
+
raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
|
540
|
+
raise AirflowException(f"Sensor failed with the following message: {event['message']}")
|
541
|
+
|
542
|
+
@cached_property
|
543
|
+
def hook(self) -> DataflowHook:
|
544
|
+
return DataflowHook(
|
545
|
+
gcp_conn_id=self.gcp_conn_id,
|
546
|
+
impersonation_chain=self.impersonation_chain,
|
547
|
+
)
|
@@ -23,6 +23,7 @@ from typing import TYPE_CHECKING, Iterable, Sequence
|
|
23
23
|
|
24
24
|
from airflow.exceptions import AirflowException, AirflowNotFoundException, AirflowSkipException
|
25
25
|
from airflow.providers.google.cloud.hooks.datafusion import DataFusionHook
|
26
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
26
27
|
from airflow.sensors.base import BaseSensorOperator
|
27
28
|
|
28
29
|
if TYPE_CHECKING:
|
@@ -65,7 +66,7 @@ class CloudDataFusionPipelineStateSensor(BaseSensorOperator):
|
|
65
66
|
instance_name: str,
|
66
67
|
location: str,
|
67
68
|
failure_statuses: Iterable[str] | None = None,
|
68
|
-
project_id: str
|
69
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
69
70
|
namespace: str = "default",
|
70
71
|
gcp_conn_id: str = "google_cloud_default",
|
71
72
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -27,6 +27,7 @@ from google.cloud.dataproc_v1.types import Batch, JobStatus
|
|
27
27
|
|
28
28
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
29
29
|
from airflow.providers.google.cloud.hooks.dataproc import DataprocHook
|
30
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
30
31
|
from airflow.sensors.base import BaseSensorOperator
|
31
32
|
|
32
33
|
if TYPE_CHECKING:
|
@@ -53,7 +54,7 @@ class DataprocJobSensor(BaseSensorOperator):
|
|
53
54
|
*,
|
54
55
|
dataproc_job_id: str,
|
55
56
|
region: str,
|
56
|
-
project_id: str
|
57
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
57
58
|
gcp_conn_id: str = "google_cloud_default",
|
58
59
|
wait_timeout: int | None = None,
|
59
60
|
**kwargs,
|
@@ -144,7 +145,7 @@ class DataprocBatchSensor(BaseSensorOperator):
|
|
144
145
|
*,
|
145
146
|
batch_id: str,
|
146
147
|
region: str,
|
147
|
-
project_id: str
|
148
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
148
149
|
gcp_conn_id: str = "google_cloud_default",
|
149
150
|
wait_timeout: int | None = None,
|
150
151
|
**kwargs,
|
@@ -89,7 +89,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
|
|
89
89
|
self.object = object
|
90
90
|
self.use_glob = use_glob
|
91
91
|
self.google_cloud_conn_id = google_cloud_conn_id
|
92
|
-
self._matches:
|
92
|
+
self._matches: bool = False
|
93
93
|
self.impersonation_chain = impersonation_chain
|
94
94
|
self.retry = retry
|
95
95
|
|
@@ -101,17 +101,16 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
|
|
101
101
|
gcp_conn_id=self.google_cloud_conn_id,
|
102
102
|
impersonation_chain=self.impersonation_chain,
|
103
103
|
)
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
104
|
+
self._matches = (
|
105
|
+
bool(hook.list(self.bucket, match_glob=self.object))
|
106
|
+
if self.use_glob
|
107
|
+
else hook.exists(self.bucket, self.object, self.retry)
|
108
|
+
)
|
109
|
+
return self._matches
|
109
110
|
|
110
|
-
def execute(self, context: Context)
|
111
|
+
def execute(self, context: Context):
|
111
112
|
"""Airflow runs this method on the worker and defers using the trigger."""
|
112
|
-
if
|
113
|
-
super().execute(context)
|
114
|
-
else:
|
113
|
+
if self.deferrable:
|
115
114
|
if not self.poke(context=context):
|
116
115
|
self.defer(
|
117
116
|
timeout=timedelta(seconds=self.timeout),
|
@@ -127,8 +126,11 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
|
|
127
126
|
),
|
128
127
|
method_name="execute_complete",
|
129
128
|
)
|
129
|
+
else:
|
130
|
+
super().execute(context)
|
131
|
+
return self._matches
|
130
132
|
|
131
|
-
def execute_complete(self, context: Context, event: dict[str, str]) ->
|
133
|
+
def execute_complete(self, context: Context, event: dict[str, str]) -> bool:
|
132
134
|
"""
|
133
135
|
Act as a callback for when the trigger fires - returns immediately.
|
134
136
|
|
@@ -140,7 +142,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
|
|
140
142
|
raise AirflowSkipException(event["message"])
|
141
143
|
raise AirflowException(event["message"])
|
142
144
|
self.log.info("File %s was found in bucket %s.", self.object, self.bucket)
|
143
|
-
return
|
145
|
+
return True
|
144
146
|
|
145
147
|
|
146
148
|
@deprecated(
|
@@ -22,6 +22,7 @@ from __future__ import annotations
|
|
22
22
|
from typing import TYPE_CHECKING, Sequence
|
23
23
|
|
24
24
|
from airflow.providers.google.cloud.hooks.tasks import CloudTasksHook
|
25
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
25
26
|
from airflow.sensors.base import BaseSensorOperator
|
26
27
|
|
27
28
|
if TYPE_CHECKING:
|
@@ -57,7 +58,7 @@ class TaskQueueEmptySensor(BaseSensorOperator):
|
|
57
58
|
self,
|
58
59
|
*,
|
59
60
|
location: str,
|
60
|
-
project_id: str
|
61
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
61
62
|
queue_name: str | None = None,
|
62
63
|
gcp_conn_id: str = "google_cloud_default",
|
63
64
|
impersonation_chain: str | Sequence[str] | None = None,
|
@@ -23,6 +23,7 @@ from google.cloud.workflows.executions_v1beta import Execution
|
|
23
23
|
|
24
24
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
25
25
|
from airflow.providers.google.cloud.hooks.workflows import WorkflowsHook
|
26
|
+
from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
|
26
27
|
from airflow.sensors.base import BaseSensorOperator
|
27
28
|
|
28
29
|
if TYPE_CHECKING:
|
@@ -58,7 +59,7 @@ class WorkflowExecutionSensor(BaseSensorOperator):
|
|
58
59
|
workflow_id: str,
|
59
60
|
execution_id: str,
|
60
61
|
location: str,
|
61
|
-
project_id: str
|
62
|
+
project_id: str = PROVIDE_PROJECT_ID,
|
62
63
|
success_states: set[Execution.State] | None = None,
|
63
64
|
failure_states: set[Execution.State] | None = None,
|
64
65
|
retry: Retry | _MethodDefault = DEFAULT,
|
@@ -24,8 +24,14 @@ from tempfile import NamedTemporaryFile
|
|
24
24
|
from typing import TYPE_CHECKING, Sequence
|
25
25
|
|
26
26
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
|
27
|
-
|
28
|
-
|
27
|
+
|
28
|
+
try:
|
29
|
+
from airflow.providers.microsoft.azure.hooks.data_lake import AzureDataLakeHook
|
30
|
+
from airflow.providers.microsoft.azure.operators.adls import ADLSListOperator
|
31
|
+
except ModuleNotFoundError as e:
|
32
|
+
from airflow.exceptions import AirflowOptionalProviderFeatureException
|
33
|
+
|
34
|
+
raise AirflowOptionalProviderFeatureException(e)
|
29
35
|
|
30
36
|
if TYPE_CHECKING:
|
31
37
|
from airflow.utils.context import Context
|
@@ -22,7 +22,13 @@ from typing import TYPE_CHECKING, Sequence
|
|
22
22
|
|
23
23
|
from airflow.models import BaseOperator
|
24
24
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
25
|
-
|
25
|
+
|
26
|
+
try:
|
27
|
+
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
|
28
|
+
except ModuleNotFoundError as e:
|
29
|
+
from airflow.exceptions import AirflowOptionalProviderFeatureException
|
30
|
+
|
31
|
+
raise AirflowOptionalProviderFeatureException(e)
|
26
32
|
|
27
33
|
if TYPE_CHECKING:
|
28
34
|
from airflow.utils.context import Context
|
@@ -24,7 +24,13 @@ from typing import TYPE_CHECKING, Sequence
|
|
24
24
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
25
25
|
from airflow.models import BaseOperator
|
26
26
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url, gcs_object_is_directory
|
27
|
-
|
27
|
+
|
28
|
+
try:
|
29
|
+
from airflow.providers.microsoft.azure.hooks.fileshare import AzureFileShareHook
|
30
|
+
except ModuleNotFoundError as e:
|
31
|
+
from airflow.exceptions import AirflowOptionalProviderFeatureException
|
32
|
+
|
33
|
+
raise AirflowOptionalProviderFeatureException(e)
|
28
34
|
|
29
35
|
if TYPE_CHECKING:
|
30
36
|
from airflow.utils.context import Context
|