apache-airflow-providers-google 10.17.0rc1__py3-none-any.whl → 10.18.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. airflow/providers/google/__init__.py +3 -3
  2. airflow/providers/google/cloud/hooks/automl.py +1 -1
  3. airflow/providers/google/cloud/hooks/bigquery.py +64 -33
  4. airflow/providers/google/cloud/hooks/cloud_composer.py +250 -2
  5. airflow/providers/google/cloud/hooks/cloud_sql.py +154 -7
  6. airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +7 -2
  7. airflow/providers/google/cloud/hooks/compute_ssh.py +2 -1
  8. airflow/providers/google/cloud/hooks/dataflow.py +246 -32
  9. airflow/providers/google/cloud/hooks/dataplex.py +6 -2
  10. airflow/providers/google/cloud/hooks/dlp.py +14 -14
  11. airflow/providers/google/cloud/hooks/gcs.py +6 -2
  12. airflow/providers/google/cloud/hooks/gdm.py +2 -2
  13. airflow/providers/google/cloud/hooks/kubernetes_engine.py +2 -2
  14. airflow/providers/google/cloud/hooks/mlengine.py +8 -4
  15. airflow/providers/google/cloud/hooks/pubsub.py +1 -1
  16. airflow/providers/google/cloud/hooks/secret_manager.py +252 -4
  17. airflow/providers/google/cloud/hooks/vertex_ai/custom_job.py +1431 -74
  18. airflow/providers/google/cloud/links/vertex_ai.py +2 -1
  19. airflow/providers/google/cloud/log/gcs_task_handler.py +2 -1
  20. airflow/providers/google/cloud/operators/automl.py +13 -12
  21. airflow/providers/google/cloud/operators/bigquery.py +36 -22
  22. airflow/providers/google/cloud/operators/bigquery_dts.py +4 -3
  23. airflow/providers/google/cloud/operators/bigtable.py +7 -6
  24. airflow/providers/google/cloud/operators/cloud_build.py +12 -11
  25. airflow/providers/google/cloud/operators/cloud_composer.py +147 -2
  26. airflow/providers/google/cloud/operators/cloud_memorystore.py +17 -16
  27. airflow/providers/google/cloud/operators/cloud_sql.py +60 -17
  28. airflow/providers/google/cloud/operators/cloud_storage_transfer_service.py +35 -16
  29. airflow/providers/google/cloud/operators/compute.py +12 -11
  30. airflow/providers/google/cloud/operators/datacatalog.py +21 -20
  31. airflow/providers/google/cloud/operators/dataflow.py +59 -42
  32. airflow/providers/google/cloud/operators/datafusion.py +11 -10
  33. airflow/providers/google/cloud/operators/datapipeline.py +3 -2
  34. airflow/providers/google/cloud/operators/dataprep.py +5 -4
  35. airflow/providers/google/cloud/operators/dataproc.py +19 -16
  36. airflow/providers/google/cloud/operators/datastore.py +8 -7
  37. airflow/providers/google/cloud/operators/dlp.py +31 -30
  38. airflow/providers/google/cloud/operators/functions.py +4 -3
  39. airflow/providers/google/cloud/operators/gcs.py +66 -41
  40. airflow/providers/google/cloud/operators/kubernetes_engine.py +232 -12
  41. airflow/providers/google/cloud/operators/life_sciences.py +2 -1
  42. airflow/providers/google/cloud/operators/mlengine.py +11 -10
  43. airflow/providers/google/cloud/operators/pubsub.py +6 -5
  44. airflow/providers/google/cloud/operators/spanner.py +7 -6
  45. airflow/providers/google/cloud/operators/speech_to_text.py +2 -1
  46. airflow/providers/google/cloud/operators/stackdriver.py +11 -10
  47. airflow/providers/google/cloud/operators/tasks.py +14 -13
  48. airflow/providers/google/cloud/operators/text_to_speech.py +2 -1
  49. airflow/providers/google/cloud/operators/translate_speech.py +2 -1
  50. airflow/providers/google/cloud/operators/vertex_ai/custom_job.py +333 -26
  51. airflow/providers/google/cloud/operators/vertex_ai/generative_model.py +20 -12
  52. airflow/providers/google/cloud/operators/vertex_ai/pipeline_job.py +0 -1
  53. airflow/providers/google/cloud/operators/vision.py +13 -12
  54. airflow/providers/google/cloud/operators/workflows.py +10 -9
  55. airflow/providers/google/cloud/secrets/secret_manager.py +2 -1
  56. airflow/providers/google/cloud/sensors/bigquery_dts.py +2 -1
  57. airflow/providers/google/cloud/sensors/bigtable.py +2 -1
  58. airflow/providers/google/cloud/sensors/cloud_storage_transfer_service.py +2 -1
  59. airflow/providers/google/cloud/sensors/dataflow.py +239 -52
  60. airflow/providers/google/cloud/sensors/datafusion.py +2 -1
  61. airflow/providers/google/cloud/sensors/dataproc.py +3 -2
  62. airflow/providers/google/cloud/sensors/gcs.py +14 -12
  63. airflow/providers/google/cloud/sensors/tasks.py +2 -1
  64. airflow/providers/google/cloud/sensors/workflows.py +2 -1
  65. airflow/providers/google/cloud/transfers/adls_to_gcs.py +8 -2
  66. airflow/providers/google/cloud/transfers/azure_blob_to_gcs.py +7 -1
  67. airflow/providers/google/cloud/transfers/azure_fileshare_to_gcs.py +7 -1
  68. airflow/providers/google/cloud/transfers/bigquery_to_gcs.py +2 -1
  69. airflow/providers/google/cloud/transfers/bigquery_to_mssql.py +1 -1
  70. airflow/providers/google/cloud/transfers/bigquery_to_sql.py +1 -0
  71. airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +5 -6
  72. airflow/providers/google/cloud/transfers/gcs_to_gcs.py +22 -12
  73. airflow/providers/google/cloud/triggers/bigquery.py +14 -3
  74. airflow/providers/google/cloud/triggers/cloud_composer.py +68 -0
  75. airflow/providers/google/cloud/triggers/cloud_sql.py +2 -1
  76. airflow/providers/google/cloud/triggers/cloud_storage_transfer_service.py +2 -1
  77. airflow/providers/google/cloud/triggers/dataflow.py +504 -4
  78. airflow/providers/google/cloud/triggers/dataproc.py +110 -26
  79. airflow/providers/google/cloud/triggers/mlengine.py +2 -1
  80. airflow/providers/google/cloud/triggers/vertex_ai.py +94 -0
  81. airflow/providers/google/common/hooks/base_google.py +45 -7
  82. airflow/providers/google/firebase/hooks/firestore.py +2 -2
  83. airflow/providers/google/firebase/operators/firestore.py +2 -1
  84. airflow/providers/google/get_provider_info.py +3 -2
  85. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/METADATA +8 -8
  86. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/RECORD +88 -89
  87. airflow/providers/google/cloud/example_dags/example_cloud_sql_query.py +0 -289
  88. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/WHEEL +0 -0
  89. {apache_airflow_providers_google-10.17.0rc1.dist-info → apache_airflow_providers_google-10.18.0rc1.dist-info}/entry_points.txt +0 -0
@@ -19,14 +19,23 @@
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- from typing import TYPE_CHECKING, Callable, Sequence
22
+ from functools import cached_property
23
+ from typing import TYPE_CHECKING, Any, Callable, Sequence
23
24
 
25
+ from airflow.configuration import conf
24
26
  from airflow.exceptions import AirflowException, AirflowSkipException
25
27
  from airflow.providers.google.cloud.hooks.dataflow import (
26
28
  DEFAULT_DATAFLOW_LOCATION,
27
29
  DataflowHook,
28
30
  DataflowJobStatus,
29
31
  )
32
+ from airflow.providers.google.cloud.triggers.dataflow import (
33
+ DataflowJobAutoScalingEventTrigger,
34
+ DataflowJobMessagesTrigger,
35
+ DataflowJobMetricsTrigger,
36
+ DataflowJobStatusTrigger,
37
+ )
38
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
30
39
  from airflow.sensors.base import BaseSensorOperator
31
40
 
32
41
  if TYPE_CHECKING:
@@ -42,7 +51,7 @@ class DataflowJobStatusSensor(BaseSensorOperator):
42
51
  :ref:`howto/operator:DataflowJobStatusSensor`
43
52
 
44
53
  :param job_id: ID of the job to be checked.
45
- :param expected_statuses: The expected state of the operation.
54
+ :param expected_statuses: The expected state(s) of the operation.
46
55
  See:
47
56
  https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
48
57
  :param project_id: Optional, the Google Cloud project ID in which to start a job.
@@ -58,6 +67,8 @@ class DataflowJobStatusSensor(BaseSensorOperator):
58
67
  If set as a sequence, the identities from the list must grant
59
68
  Service Account Token Creator IAM role to the directly preceding identity, with first
60
69
  account from the list granting this role to the originating account (templated).
70
+ :param deferrable: If True, run the sensor in the deferrable mode.
71
+ :param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
61
72
  """
62
73
 
63
74
  template_fields: Sequence[str] = ("job_id",)
@@ -67,10 +78,12 @@ class DataflowJobStatusSensor(BaseSensorOperator):
67
78
  *,
68
79
  job_id: str,
69
80
  expected_statuses: set[str] | str,
70
- project_id: str | None = None,
81
+ project_id: str = PROVIDE_PROJECT_ID,
71
82
  location: str = DEFAULT_DATAFLOW_LOCATION,
72
83
  gcp_conn_id: str = "google_cloud_default",
73
84
  impersonation_chain: str | Sequence[str] | None = None,
85
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
86
+ poll_interval: int = 10,
74
87
  **kwargs,
75
88
  ) -> None:
76
89
  super().__init__(**kwargs)
@@ -82,7 +95,8 @@ class DataflowJobStatusSensor(BaseSensorOperator):
82
95
  self.location = location
83
96
  self.gcp_conn_id = gcp_conn_id
84
97
  self.impersonation_chain = impersonation_chain
85
- self.hook: DataflowHook | None = None
98
+ self.deferrable = deferrable
99
+ self.poll_interval = poll_interval
86
100
 
87
101
  def poke(self, context: Context) -> bool:
88
102
  self.log.info(
@@ -90,10 +104,6 @@ class DataflowJobStatusSensor(BaseSensorOperator):
90
104
  self.job_id,
91
105
  ", ".join(self.expected_statuses),
92
106
  )
93
- self.hook = DataflowHook(
94
- gcp_conn_id=self.gcp_conn_id,
95
- impersonation_chain=self.impersonation_chain,
96
- )
97
107
 
98
108
  job = self.hook.get_job(
99
109
  job_id=self.job_id,
@@ -115,10 +125,51 @@ class DataflowJobStatusSensor(BaseSensorOperator):
115
125
 
116
126
  return False
117
127
 
128
+ def execute(self, context: Context) -> None:
129
+ """Airflow runs this method on the worker and defers using the trigger."""
130
+ if not self.deferrable:
131
+ super().execute(context)
132
+ elif not self.poke(context=context):
133
+ self.defer(
134
+ timeout=self.execution_timeout,
135
+ trigger=DataflowJobStatusTrigger(
136
+ job_id=self.job_id,
137
+ expected_statuses=self.expected_statuses,
138
+ project_id=self.project_id,
139
+ location=self.location,
140
+ gcp_conn_id=self.gcp_conn_id,
141
+ poll_sleep=self.poll_interval,
142
+ impersonation_chain=self.impersonation_chain,
143
+ ),
144
+ method_name="execute_complete",
145
+ )
146
+
147
+ def execute_complete(self, context: Context, event: dict[str, str | list]) -> bool:
148
+ """
149
+ Execute this method when the task resumes its execution on the worker after deferral.
150
+
151
+ Returns True if the trigger returns an event with the success status, otherwise raises
152
+ an exception.
153
+ """
154
+ if event["status"] == "success":
155
+ self.log.info(event["message"])
156
+ return True
157
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
158
+ if self.soft_fail:
159
+ raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
160
+ raise AirflowException(f"Sensor failed with the following message: {event['message']}")
161
+
162
+ @cached_property
163
+ def hook(self) -> DataflowHook:
164
+ return DataflowHook(
165
+ gcp_conn_id=self.gcp_conn_id,
166
+ impersonation_chain=self.impersonation_chain,
167
+ )
168
+
118
169
 
119
170
  class DataflowJobMetricsSensor(BaseSensorOperator):
120
171
  """
121
- Checks the metrics of a job in Google Cloud Dataflow.
172
+ Checks for metrics associated with a single job in Google Cloud Dataflow.
122
173
 
123
174
  .. seealso::
124
175
  For more information on how to use this operator, take a look at the guide:
@@ -143,6 +194,9 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
143
194
  If set as a sequence, the identities from the list must grant
144
195
  Service Account Token Creator IAM role to the directly preceding identity, with first
145
196
  account from the list granting this role to the originating account (templated).
197
+ :param deferrable: If True, run the sensor in the deferrable mode.
198
+ :param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
199
+
146
200
  """
147
201
 
148
202
  template_fields: Sequence[str] = ("job_id",)
@@ -151,12 +205,14 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
151
205
  self,
152
206
  *,
153
207
  job_id: str,
154
- callback: Callable[[dict], bool],
208
+ callback: Callable | None = None,
155
209
  fail_on_terminal_state: bool = True,
156
- project_id: str | None = None,
210
+ project_id: str = PROVIDE_PROJECT_ID,
157
211
  location: str = DEFAULT_DATAFLOW_LOCATION,
158
212
  gcp_conn_id: str = "google_cloud_default",
159
213
  impersonation_chain: str | Sequence[str] | None = None,
214
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
215
+ poll_interval: int = 10,
160
216
  **kwargs,
161
217
  ) -> None:
162
218
  super().__init__(**kwargs)
@@ -167,14 +223,10 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
167
223
  self.location = location
168
224
  self.gcp_conn_id = gcp_conn_id
169
225
  self.impersonation_chain = impersonation_chain
170
- self.hook: DataflowHook | None = None
226
+ self.deferrable = deferrable
227
+ self.poll_interval = poll_interval
171
228
 
172
229
  def poke(self, context: Context) -> bool:
173
- self.hook = DataflowHook(
174
- gcp_conn_id=self.gcp_conn_id,
175
- impersonation_chain=self.impersonation_chain,
176
- )
177
-
178
230
  if self.fail_on_terminal_state:
179
231
  job = self.hook.get_job(
180
232
  job_id=self.job_id,
@@ -194,27 +246,73 @@ class DataflowJobMetricsSensor(BaseSensorOperator):
194
246
  project_id=self.project_id,
195
247
  location=self.location,
196
248
  )
249
+ return result["metrics"] if self.callback is None else self.callback(result["metrics"])
250
+
251
+ def execute(self, context: Context) -> Any:
252
+ """Airflow runs this method on the worker and defers using the trigger."""
253
+ if not self.deferrable:
254
+ super().execute(context)
255
+ else:
256
+ self.defer(
257
+ timeout=self.execution_timeout,
258
+ trigger=DataflowJobMetricsTrigger(
259
+ job_id=self.job_id,
260
+ project_id=self.project_id,
261
+ location=self.location,
262
+ gcp_conn_id=self.gcp_conn_id,
263
+ poll_sleep=self.poll_interval,
264
+ impersonation_chain=self.impersonation_chain,
265
+ fail_on_terminal_state=self.fail_on_terminal_state,
266
+ ),
267
+ method_name="execute_complete",
268
+ )
197
269
 
198
- return self.callback(result["metrics"])
270
+ def execute_complete(self, context: Context, event: dict[str, str | list]) -> Any:
271
+ """
272
+ Execute this method when the task resumes its execution on the worker after deferral.
273
+
274
+ If the trigger returns an event with success status - passes the event result to the callback function.
275
+ Returns the event result if no callback function is provided.
276
+
277
+ If the trigger returns an event with error status - raises an exception.
278
+ """
279
+ if event["status"] == "success":
280
+ self.log.info(event["message"])
281
+ return event["result"] if self.callback is None else self.callback(event["result"])
282
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
283
+ if self.soft_fail:
284
+ raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
285
+ raise AirflowException(f"Sensor failed with the following message: {event['message']}")
286
+
287
+ @cached_property
288
+ def hook(self) -> DataflowHook:
289
+ return DataflowHook(
290
+ gcp_conn_id=self.gcp_conn_id,
291
+ impersonation_chain=self.impersonation_chain,
292
+ )
199
293
 
200
294
 
201
295
  class DataflowJobMessagesSensor(BaseSensorOperator):
202
296
  """
203
- Checks for the job message in Google Cloud Dataflow.
297
+ Checks for job messages associated with a single job in Google Cloud Dataflow.
204
298
 
205
299
  .. seealso::
206
300
  For more information on how to use this operator, take a look at the guide:
207
301
  :ref:`howto/operator:DataflowJobMessagesSensor`
208
302
 
209
- :param job_id: ID of the job to be checked.
210
- :param callback: callback which is called with list of read job metrics
211
- See:
212
- https://cloud.google.com/dataflow/docs/reference/rest/v1b3/MetricUpdate
213
- :param fail_on_terminal_state: If set to true sensor will raise Exception when
214
- job is in terminal state
303
+ :param job_id: ID of the Dataflow job to be checked.
304
+ :param callback: a function that can accept a list of serialized job messages.
305
+ It can do whatever you want it to do. If the callback function is not provided,
306
+ then on successful completion the task will exit with True value.
307
+ For more info about the job message content see:
308
+ https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.JobMessage
309
+ :param fail_on_terminal_state: If set to True the sensor will raise an exception when the job reaches a terminal state.
310
+ No job messages will be returned.
215
311
  :param project_id: Optional, the Google Cloud project ID in which to start a job.
216
312
  If set to None or missing, the default project_id from the Google Cloud connection is used.
217
- :param location: Job location.
313
+ :param location: The location of the Dataflow job (for example europe-west1).
314
+ If set to None then the value of DEFAULT_DATAFLOW_LOCATION will be used.
315
+ See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints
218
316
  :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
219
317
  :param impersonation_chain: Optional service account to impersonate using short-term
220
318
  credentials, or chained list of accounts required to get the access_token
@@ -224,6 +322,8 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
224
322
  If set as a sequence, the identities from the list must grant
225
323
  Service Account Token Creator IAM role to the directly preceding identity, with first
226
324
  account from the list granting this role to the originating account (templated).
325
+ :param deferrable: If True, run the sensor in the deferrable mode.
326
+ :param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
227
327
  """
228
328
 
229
329
  template_fields: Sequence[str] = ("job_id",)
@@ -232,12 +332,14 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
232
332
  self,
233
333
  *,
234
334
  job_id: str,
235
- callback: Callable,
335
+ callback: Callable | None = None,
236
336
  fail_on_terminal_state: bool = True,
237
- project_id: str | None = None,
337
+ project_id: str = PROVIDE_PROJECT_ID,
238
338
  location: str = DEFAULT_DATAFLOW_LOCATION,
239
339
  gcp_conn_id: str = "google_cloud_default",
240
340
  impersonation_chain: str | Sequence[str] | None = None,
341
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
342
+ poll_interval: int = 10,
241
343
  **kwargs,
242
344
  ) -> None:
243
345
  super().__init__(**kwargs)
@@ -248,14 +350,10 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
248
350
  self.location = location
249
351
  self.gcp_conn_id = gcp_conn_id
250
352
  self.impersonation_chain = impersonation_chain
251
- self.hook: DataflowHook | None = None
353
+ self.deferrable = deferrable
354
+ self.poll_interval = poll_interval
252
355
 
253
356
  def poke(self, context: Context) -> bool:
254
- self.hook = DataflowHook(
255
- gcp_conn_id=self.gcp_conn_id,
256
- impersonation_chain=self.impersonation_chain,
257
- )
258
-
259
357
  if self.fail_on_terminal_state:
260
358
  job = self.hook.get_job(
261
359
  job_id=self.job_id,
@@ -276,26 +374,73 @@ class DataflowJobMessagesSensor(BaseSensorOperator):
276
374
  location=self.location,
277
375
  )
278
376
 
279
- return self.callback(result)
377
+ return result if self.callback is None else self.callback(result)
378
+
379
+ def execute(self, context: Context) -> Any:
380
+ """Airflow runs this method on the worker and defers using the trigger."""
381
+ if not self.deferrable:
382
+ super().execute(context)
383
+ else:
384
+ self.defer(
385
+ timeout=self.execution_timeout,
386
+ trigger=DataflowJobMessagesTrigger(
387
+ job_id=self.job_id,
388
+ project_id=self.project_id,
389
+ location=self.location,
390
+ gcp_conn_id=self.gcp_conn_id,
391
+ poll_sleep=self.poll_interval,
392
+ impersonation_chain=self.impersonation_chain,
393
+ fail_on_terminal_state=self.fail_on_terminal_state,
394
+ ),
395
+ method_name="execute_complete",
396
+ )
397
+
398
+ def execute_complete(self, context: Context, event: dict[str, str | list]) -> Any:
399
+ """
400
+ Execute this method when the task resumes its execution on the worker after deferral.
401
+
402
+ If the trigger returns an event with success status - passes the event result to the callback function.
403
+ Returns the event result if no callback function is provided.
404
+
405
+ If the trigger returns an event with error status - raises an exception.
406
+ """
407
+ if event["status"] == "success":
408
+ self.log.info(event["message"])
409
+ return event["result"] if self.callback is None else self.callback(event["result"])
410
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
411
+ if self.soft_fail:
412
+ raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
413
+ raise AirflowException(f"Sensor failed with the following message: {event['message']}")
414
+
415
+ @cached_property
416
+ def hook(self) -> DataflowHook:
417
+ return DataflowHook(
418
+ gcp_conn_id=self.gcp_conn_id,
419
+ impersonation_chain=self.impersonation_chain,
420
+ )
280
421
 
281
422
 
282
423
  class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
283
424
  """
284
- Checks for the job autoscaling event in Google Cloud Dataflow.
425
+ Checks for autoscaling events associated with a single job in Google Cloud Dataflow.
285
426
 
286
427
  .. seealso::
287
428
  For more information on how to use this operator, take a look at the guide:
288
429
  :ref:`howto/operator:DataflowJobAutoScalingEventsSensor`
289
430
 
290
- :param job_id: ID of the job to be checked.
291
- :param callback: callback which is called with list of read job metrics
292
- See:
293
- https://cloud.google.com/dataflow/docs/reference/rest/v1b3/MetricUpdate
294
- :param fail_on_terminal_state: If set to true sensor will raise Exception when
295
- job is in terminal state
431
+ :param job_id: ID of the Dataflow job to be checked.
432
+ :param callback: a function that can accept a list of serialized autoscaling events.
433
+ It can do whatever you want it to do. If the callback function is not provided,
434
+ then on successful completion the task will exit with True value.
435
+ For more info about the autoscaling event content see:
436
+ https://cloud.google.com/python/docs/reference/dataflow/latest/google.cloud.dataflow_v1beta3.types.AutoscalingEvent
437
+ :param fail_on_terminal_state: If set to True the sensor will raise an exception when the job reaches a terminal state.
438
+ No autoscaling events will be returned.
296
439
  :param project_id: Optional, the Google Cloud project ID in which to start a job.
297
440
  If set to None or missing, the default project_id from the Google Cloud connection is used.
298
- :param location: Job location.
441
+ :param location: The location of the Dataflow job (for example europe-west1).
442
+ If set to None then the value of DEFAULT_DATAFLOW_LOCATION will be used.
443
+ See: https://cloud.google.com/dataflow/docs/concepts/regional-endpoints
299
444
  :param gcp_conn_id: The connection ID to use connecting to Google Cloud.
300
445
  :param impersonation_chain: Optional service account to impersonate using short-term
301
446
  credentials, or chained list of accounts required to get the access_token
@@ -305,6 +450,8 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
305
450
  If set as a sequence, the identities from the list must grant
306
451
  Service Account Token Creator IAM role to the directly preceding identity, with first
307
452
  account from the list granting this role to the originating account (templated).
453
+ :param deferrable: If True, run the sensor in the deferrable mode.
454
+ :param poll_interval: Time (seconds) to wait between two consecutive calls to check the job.
308
455
  """
309
456
 
310
457
  template_fields: Sequence[str] = ("job_id",)
@@ -313,12 +460,14 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
313
460
  self,
314
461
  *,
315
462
  job_id: str,
316
- callback: Callable,
463
+ callback: Callable | None = None,
317
464
  fail_on_terminal_state: bool = True,
318
- project_id: str | None = None,
465
+ project_id: str = PROVIDE_PROJECT_ID,
319
466
  location: str = DEFAULT_DATAFLOW_LOCATION,
320
467
  gcp_conn_id: str = "google_cloud_default",
321
468
  impersonation_chain: str | Sequence[str] | None = None,
469
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
470
+ poll_interval: int = 60,
322
471
  **kwargs,
323
472
  ) -> None:
324
473
  super().__init__(**kwargs)
@@ -329,14 +478,10 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
329
478
  self.location = location
330
479
  self.gcp_conn_id = gcp_conn_id
331
480
  self.impersonation_chain = impersonation_chain
332
- self.hook: DataflowHook | None = None
481
+ self.deferrable = deferrable
482
+ self.poll_interval = poll_interval
333
483
 
334
484
  def poke(self, context: Context) -> bool:
335
- self.hook = DataflowHook(
336
- gcp_conn_id=self.gcp_conn_id,
337
- impersonation_chain=self.impersonation_chain,
338
- )
339
-
340
485
  if self.fail_on_terminal_state:
341
486
  job = self.hook.get_job(
342
487
  job_id=self.job_id,
@@ -357,4 +502,46 @@ class DataflowJobAutoScalingEventsSensor(BaseSensorOperator):
357
502
  location=self.location,
358
503
  )
359
504
 
360
- return self.callback(result)
505
+ return result if self.callback is None else self.callback(result)
506
+
507
+ def execute(self, context: Context) -> Any:
508
+ """Airflow runs this method on the worker and defers using the trigger."""
509
+ if not self.deferrable:
510
+ super().execute(context)
511
+ else:
512
+ self.defer(
513
+ trigger=DataflowJobAutoScalingEventTrigger(
514
+ job_id=self.job_id,
515
+ project_id=self.project_id,
516
+ location=self.location,
517
+ gcp_conn_id=self.gcp_conn_id,
518
+ poll_sleep=self.poll_interval,
519
+ impersonation_chain=self.impersonation_chain,
520
+ fail_on_terminal_state=self.fail_on_terminal_state,
521
+ ),
522
+ method_name="execute_complete",
523
+ )
524
+
525
+ def execute_complete(self, context: Context, event: dict[str, str | list]) -> Any:
526
+ """
527
+ Execute this method when the task resumes its execution on the worker after deferral.
528
+
529
+ If the trigger returns an event with success status - passes the event result to the callback function.
530
+ Returns the event result if no callback function is provided.
531
+
532
+ If the trigger returns an event with error status - raises an exception.
533
+ """
534
+ if event["status"] == "success":
535
+ self.log.info(event["message"])
536
+ return event["result"] if self.callback is None else self.callback(event["result"])
537
+ # TODO: remove this if check when min_airflow_version is set to higher than 2.7.1
538
+ if self.soft_fail:
539
+ raise AirflowSkipException(f"Sensor failed with the following message: {event['message']}.")
540
+ raise AirflowException(f"Sensor failed with the following message: {event['message']}")
541
+
542
+ @cached_property
543
+ def hook(self) -> DataflowHook:
544
+ return DataflowHook(
545
+ gcp_conn_id=self.gcp_conn_id,
546
+ impersonation_chain=self.impersonation_chain,
547
+ )
@@ -23,6 +23,7 @@ from typing import TYPE_CHECKING, Iterable, Sequence
23
23
 
24
24
  from airflow.exceptions import AirflowException, AirflowNotFoundException, AirflowSkipException
25
25
  from airflow.providers.google.cloud.hooks.datafusion import DataFusionHook
26
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
26
27
  from airflow.sensors.base import BaseSensorOperator
27
28
 
28
29
  if TYPE_CHECKING:
@@ -65,7 +66,7 @@ class CloudDataFusionPipelineStateSensor(BaseSensorOperator):
65
66
  instance_name: str,
66
67
  location: str,
67
68
  failure_statuses: Iterable[str] | None = None,
68
- project_id: str | None = None,
69
+ project_id: str = PROVIDE_PROJECT_ID,
69
70
  namespace: str = "default",
70
71
  gcp_conn_id: str = "google_cloud_default",
71
72
  impersonation_chain: str | Sequence[str] | None = None,
@@ -27,6 +27,7 @@ from google.cloud.dataproc_v1.types import Batch, JobStatus
27
27
 
28
28
  from airflow.exceptions import AirflowException, AirflowSkipException
29
29
  from airflow.providers.google.cloud.hooks.dataproc import DataprocHook
30
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
30
31
  from airflow.sensors.base import BaseSensorOperator
31
32
 
32
33
  if TYPE_CHECKING:
@@ -53,7 +54,7 @@ class DataprocJobSensor(BaseSensorOperator):
53
54
  *,
54
55
  dataproc_job_id: str,
55
56
  region: str,
56
- project_id: str | None = None,
57
+ project_id: str = PROVIDE_PROJECT_ID,
57
58
  gcp_conn_id: str = "google_cloud_default",
58
59
  wait_timeout: int | None = None,
59
60
  **kwargs,
@@ -144,7 +145,7 @@ class DataprocBatchSensor(BaseSensorOperator):
144
145
  *,
145
146
  batch_id: str,
146
147
  region: str,
147
- project_id: str | None = None,
148
+ project_id: str = PROVIDE_PROJECT_ID,
148
149
  gcp_conn_id: str = "google_cloud_default",
149
150
  wait_timeout: int | None = None,
150
151
  **kwargs,
@@ -89,7 +89,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
89
89
  self.object = object
90
90
  self.use_glob = use_glob
91
91
  self.google_cloud_conn_id = google_cloud_conn_id
92
- self._matches: list[str] = []
92
+ self._matches: bool = False
93
93
  self.impersonation_chain = impersonation_chain
94
94
  self.retry = retry
95
95
 
@@ -101,17 +101,16 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
101
101
  gcp_conn_id=self.google_cloud_conn_id,
102
102
  impersonation_chain=self.impersonation_chain,
103
103
  )
104
- if self.use_glob:
105
- self._matches = hook.list(self.bucket, match_glob=self.object)
106
- return bool(self._matches)
107
- else:
108
- return hook.exists(self.bucket, self.object, self.retry)
104
+ self._matches = (
105
+ bool(hook.list(self.bucket, match_glob=self.object))
106
+ if self.use_glob
107
+ else hook.exists(self.bucket, self.object, self.retry)
108
+ )
109
+ return self._matches
109
110
 
110
- def execute(self, context: Context) -> None:
111
+ def execute(self, context: Context):
111
112
  """Airflow runs this method on the worker and defers using the trigger."""
112
- if not self.deferrable:
113
- super().execute(context)
114
- else:
113
+ if self.deferrable:
115
114
  if not self.poke(context=context):
116
115
  self.defer(
117
116
  timeout=timedelta(seconds=self.timeout),
@@ -127,8 +126,11 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
127
126
  ),
128
127
  method_name="execute_complete",
129
128
  )
129
+ else:
130
+ super().execute(context)
131
+ return self._matches
130
132
 
131
- def execute_complete(self, context: Context, event: dict[str, str]) -> str:
133
+ def execute_complete(self, context: Context, event: dict[str, str]) -> bool:
132
134
  """
133
135
  Act as a callback for when the trigger fires - returns immediately.
134
136
 
@@ -140,7 +142,7 @@ class GCSObjectExistenceSensor(BaseSensorOperator):
140
142
  raise AirflowSkipException(event["message"])
141
143
  raise AirflowException(event["message"])
142
144
  self.log.info("File %s was found in bucket %s.", self.object, self.bucket)
143
- return event["message"]
145
+ return True
144
146
 
145
147
 
146
148
  @deprecated(
@@ -22,6 +22,7 @@ from __future__ import annotations
22
22
  from typing import TYPE_CHECKING, Sequence
23
23
 
24
24
  from airflow.providers.google.cloud.hooks.tasks import CloudTasksHook
25
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
25
26
  from airflow.sensors.base import BaseSensorOperator
26
27
 
27
28
  if TYPE_CHECKING:
@@ -57,7 +58,7 @@ class TaskQueueEmptySensor(BaseSensorOperator):
57
58
  self,
58
59
  *,
59
60
  location: str,
60
- project_id: str | None = None,
61
+ project_id: str = PROVIDE_PROJECT_ID,
61
62
  queue_name: str | None = None,
62
63
  gcp_conn_id: str = "google_cloud_default",
63
64
  impersonation_chain: str | Sequence[str] | None = None,
@@ -23,6 +23,7 @@ from google.cloud.workflows.executions_v1beta import Execution
23
23
 
24
24
  from airflow.exceptions import AirflowException, AirflowSkipException
25
25
  from airflow.providers.google.cloud.hooks.workflows import WorkflowsHook
26
+ from airflow.providers.google.common.hooks.base_google import PROVIDE_PROJECT_ID
26
27
  from airflow.sensors.base import BaseSensorOperator
27
28
 
28
29
  if TYPE_CHECKING:
@@ -58,7 +59,7 @@ class WorkflowExecutionSensor(BaseSensorOperator):
58
59
  workflow_id: str,
59
60
  execution_id: str,
60
61
  location: str,
61
- project_id: str | None = None,
62
+ project_id: str = PROVIDE_PROJECT_ID,
62
63
  success_states: set[Execution.State] | None = None,
63
64
  failure_states: set[Execution.State] | None = None,
64
65
  retry: Retry | _MethodDefault = DEFAULT,
@@ -24,8 +24,14 @@ from tempfile import NamedTemporaryFile
24
24
  from typing import TYPE_CHECKING, Sequence
25
25
 
26
26
  from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url
27
- from airflow.providers.microsoft.azure.hooks.data_lake import AzureDataLakeHook
28
- from airflow.providers.microsoft.azure.operators.adls import ADLSListOperator
27
+
28
+ try:
29
+ from airflow.providers.microsoft.azure.hooks.data_lake import AzureDataLakeHook
30
+ from airflow.providers.microsoft.azure.operators.adls import ADLSListOperator
31
+ except ModuleNotFoundError as e:
32
+ from airflow.exceptions import AirflowOptionalProviderFeatureException
33
+
34
+ raise AirflowOptionalProviderFeatureException(e)
29
35
 
30
36
  if TYPE_CHECKING:
31
37
  from airflow.utils.context import Context
@@ -22,7 +22,13 @@ from typing import TYPE_CHECKING, Sequence
22
22
 
23
23
  from airflow.models import BaseOperator
24
24
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
25
- from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
25
+
26
+ try:
27
+ from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
28
+ except ModuleNotFoundError as e:
29
+ from airflow.exceptions import AirflowOptionalProviderFeatureException
30
+
31
+ raise AirflowOptionalProviderFeatureException(e)
26
32
 
27
33
  if TYPE_CHECKING:
28
34
  from airflow.utils.context import Context
@@ -24,7 +24,13 @@ from typing import TYPE_CHECKING, Sequence
24
24
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
25
25
  from airflow.models import BaseOperator
26
26
  from airflow.providers.google.cloud.hooks.gcs import GCSHook, _parse_gcs_url, gcs_object_is_directory
27
- from airflow.providers.microsoft.azure.hooks.fileshare import AzureFileShareHook
27
+
28
+ try:
29
+ from airflow.providers.microsoft.azure.hooks.fileshare import AzureFileShareHook
30
+ except ModuleNotFoundError as e:
31
+ from airflow.exceptions import AirflowOptionalProviderFeatureException
32
+
33
+ raise AirflowOptionalProviderFeatureException(e)
28
34
 
29
35
  if TYPE_CHECKING:
30
36
  from airflow.utils.context import Context