apache-airflow-providers-amazon 8.26.0rc2__py3-none-any.whl → 8.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/datasets/__init__.py +16 -0
  3. airflow/providers/amazon/aws/datasets/s3.py +45 -0
  4. airflow/providers/amazon/aws/executors/batch/batch_executor.py +20 -13
  5. airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +24 -13
  6. airflow/providers/amazon/aws/hooks/kinesis_analytics.py +65 -0
  7. airflow/providers/amazon/aws/hooks/rds.py +3 -3
  8. airflow/providers/amazon/aws/hooks/s3.py +26 -1
  9. airflow/providers/amazon/aws/hooks/step_function.py +18 -0
  10. airflow/providers/amazon/aws/operators/athena.py +16 -17
  11. airflow/providers/amazon/aws/operators/emr.py +23 -23
  12. airflow/providers/amazon/aws/operators/kinesis_analytics.py +348 -0
  13. airflow/providers/amazon/aws/operators/rds.py +17 -20
  14. airflow/providers/amazon/aws/operators/redshift_cluster.py +71 -53
  15. airflow/providers/amazon/aws/operators/s3.py +7 -11
  16. airflow/providers/amazon/aws/operators/sagemaker.py +6 -18
  17. airflow/providers/amazon/aws/operators/step_function.py +12 -2
  18. airflow/providers/amazon/aws/sensors/kinesis_analytics.py +234 -0
  19. airflow/providers/amazon/aws/transfers/redshift_to_s3.py +1 -0
  20. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +1 -0
  21. airflow/providers/amazon/aws/triggers/emr.py +3 -1
  22. airflow/providers/amazon/aws/triggers/kinesis_analytics.py +69 -0
  23. airflow/providers/amazon/aws/triggers/sagemaker.py +9 -1
  24. airflow/providers/amazon/aws/waiters/kinesisanalyticsv2.json +151 -0
  25. airflow/providers/amazon/aws/waiters/rds.json +253 -0
  26. airflow/providers/amazon/get_provider_info.py +35 -2
  27. {apache_airflow_providers_amazon-8.26.0rc2.dist-info → apache_airflow_providers_amazon-8.27.0.dist-info}/METADATA +32 -25
  28. {apache_airflow_providers_amazon-8.26.0rc2.dist-info → apache_airflow_providers_amazon-8.27.0.dist-info}/RECORD +30 -22
  29. {apache_airflow_providers_amazon-8.26.0rc2.dist-info → apache_airflow_providers_amazon-8.27.0.dist-info}/WHEEL +0 -0
  30. {apache_airflow_providers_amazon-8.26.0rc2.dist-info → apache_airflow_providers_amazon-8.27.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,348 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ from typing import TYPE_CHECKING, Any, Sequence
20
+
21
+ from botocore.exceptions import ClientError
22
+
23
+ from airflow.configuration import conf
24
+ from airflow.exceptions import AirflowException
25
+ from airflow.providers.amazon.aws.hooks.kinesis_analytics import KinesisAnalyticsV2Hook
26
+ from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
27
+ from airflow.providers.amazon.aws.triggers.kinesis_analytics import (
28
+ KinesisAnalyticsV2ApplicationOperationCompleteTrigger,
29
+ )
30
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
31
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
32
+
33
+ if TYPE_CHECKING:
34
+ from airflow.utils.context import Context
35
+
36
+
37
+ class KinesisAnalyticsV2CreateApplicationOperator(AwsBaseOperator[KinesisAnalyticsV2Hook]):
38
+ """
39
+ Creates an AWS Managed Service for Apache Flink application.
40
+
41
+ .. seealso::
42
+ For more information on how to use this operator, take a look at the guide:
43
+ :ref:`howto/operator:KinesisAnalyticsV2CreateApplicationOperator`
44
+
45
+ :param application_name: The name of application. (templated)
46
+ :param runtime_environment: The runtime environment for the application. (templated)
47
+ :param service_execution_role: The IAM role used by the application to access services. (templated)
48
+ :param create_application_kwargs: Create application extra properties. (templated)
49
+ :param application_description: A summary description of the application. (templated)
50
+
51
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
52
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
53
+ running Airflow in a distributed manner and aws_conn_id is None or
54
+ empty, then default boto3 configuration would be used (and must be
55
+ maintained on each worker node).
56
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
57
+ :param verify: Whether to verify SSL certificates. See:
58
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
59
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
60
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
61
+ """
62
+
63
+ aws_hook_class = KinesisAnalyticsV2Hook
64
+ ui_color = "#44b5e2"
65
+
66
+ template_fields: Sequence[str] = aws_template_fields(
67
+ "application_name",
68
+ "runtime_environment",
69
+ "service_execution_role",
70
+ "create_application_kwargs",
71
+ "application_description",
72
+ )
73
+ template_fields_renderers: dict = {
74
+ "create_application_kwargs": "json",
75
+ }
76
+
77
+ def __init__(
78
+ self,
79
+ application_name: str,
80
+ runtime_environment: str,
81
+ service_execution_role: str,
82
+ create_application_kwargs: dict[str, Any] | None = None,
83
+ application_description: str = "Managed Service for Apache Flink application created from Airflow",
84
+ **kwargs,
85
+ ):
86
+ super().__init__(**kwargs)
87
+ self.application_name = application_name
88
+ self.runtime_environment = runtime_environment
89
+ self.service_execution_role = service_execution_role
90
+ self.create_application_kwargs = create_application_kwargs or {}
91
+ self.application_description = application_description
92
+
93
+ def execute(self, context: Context) -> dict[str, str]:
94
+ self.log.info("Creating AWS Managed Service for Apache Flink application %s.", self.application_name)
95
+ try:
96
+ response = self.hook.conn.create_application(
97
+ ApplicationName=self.application_name,
98
+ ApplicationDescription=self.application_description,
99
+ RuntimeEnvironment=self.runtime_environment,
100
+ ServiceExecutionRole=self.service_execution_role,
101
+ **self.create_application_kwargs,
102
+ )
103
+ except ClientError as error:
104
+ raise AirflowException(
105
+ f"AWS Managed Service for Apache Flink application creation failed: {error.response['Error']['Message']}"
106
+ )
107
+
108
+ self.log.info(
109
+ "AWS Managed Service for Apache Flink application created successfully %s.",
110
+ self.application_name,
111
+ )
112
+
113
+ return {"ApplicationARN": response["ApplicationDetail"]["ApplicationARN"]}
114
+
115
+
116
+ class KinesisAnalyticsV2StartApplicationOperator(AwsBaseOperator[KinesisAnalyticsV2Hook]):
117
+ """
118
+ Starts an AWS Managed Service for Apache Flink application.
119
+
120
+ .. seealso::
121
+ For more information on how to use this operator, take a look at the guide:
122
+ :ref:`howto/operator:KinesisAnalyticsV2StartApplicationOperator`
123
+
124
+ :param application_name: The name of application. (templated)
125
+ :param run_configuration: Application properties to start Apache Flink Job. (templated)
126
+
127
+ :param wait_for_completion: Whether to wait for job to stop. (default: True)
128
+ :param waiter_delay: Time in seconds to wait between status checks. (default: 60)
129
+ :param waiter_max_attempts: Maximum number of attempts to check for job completion. (default: 20)
130
+ :param deferrable: If True, the operator will wait asynchronously for the job to stop.
131
+ This implies waiting for completion. This mode requires aiobotocore module to be installed.
132
+ (default: False)
133
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
134
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
135
+ running Airflow in a distributed manner and aws_conn_id is None or
136
+ empty, then default boto3 configuration would be used (and must be
137
+ maintained on each worker node).
138
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
139
+ :param verify: Whether to verify SSL certificates. See:
140
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
141
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
142
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
143
+ """
144
+
145
+ aws_hook_class = KinesisAnalyticsV2Hook
146
+ ui_color = "#44b5e2"
147
+
148
+ template_fields: Sequence[str] = aws_template_fields(
149
+ "application_name",
150
+ "run_configuration",
151
+ )
152
+ template_fields_renderers: dict = {
153
+ "run_configuration": "json",
154
+ }
155
+
156
+ def __init__(
157
+ self,
158
+ application_name: str,
159
+ run_configuration: dict[str, Any] | None = None,
160
+ wait_for_completion: bool = True,
161
+ waiter_delay: int = 60,
162
+ waiter_max_attempts: int = 20,
163
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
164
+ **kwargs,
165
+ ):
166
+ super().__init__(**kwargs)
167
+ self.application_name = application_name
168
+ self.run_configuration = run_configuration or {}
169
+ self.wait_for_completion = wait_for_completion
170
+ self.waiter_delay = waiter_delay
171
+ self.waiter_max_attempts = waiter_max_attempts
172
+ self.deferrable = deferrable
173
+
174
+ def execute(self, context: Context) -> dict[str, Any]:
175
+ msg = "AWS Managed Service for Apache Flink application"
176
+
177
+ try:
178
+ self.log.info("Starting %s %s.", msg, self.application_name)
179
+ self.hook.conn.start_application(
180
+ ApplicationName=self.application_name, RunConfiguration=self.run_configuration
181
+ )
182
+ except ClientError as error:
183
+ raise AirflowException(
184
+ f"Failed to start {msg} {self.application_name}: {error.response['Error']['Message']}"
185
+ )
186
+
187
+ describe_response = self.hook.conn.describe_application(ApplicationName=self.application_name)
188
+
189
+ if self.deferrable:
190
+ self.log.info("Deferring for %s to start: %s.", msg, self.application_name)
191
+ self.defer(
192
+ trigger=KinesisAnalyticsV2ApplicationOperationCompleteTrigger(
193
+ application_name=self.application_name,
194
+ waiter_name="application_start_complete",
195
+ aws_conn_id=self.aws_conn_id,
196
+ waiter_delay=self.waiter_delay,
197
+ waiter_max_attempts=self.waiter_max_attempts,
198
+ region_name=self.region_name,
199
+ verify=self.verify,
200
+ botocore_config=self.botocore_config,
201
+ ),
202
+ method_name="execute_complete",
203
+ )
204
+ if self.wait_for_completion:
205
+ self.log.info("Waiting for %s to start: %s.", msg, self.application_name)
206
+
207
+ self.hook.get_waiter("application_start_complete").wait(
208
+ ApplicationName=self.application_name,
209
+ WaiterConfig={"Delay": self.waiter_delay, "MaxAttempts": self.waiter_max_attempts},
210
+ )
211
+
212
+ self.log.info("%s started successfully %s.", msg, self.application_name)
213
+
214
+ return {"ApplicationARN": describe_response["ApplicationDetail"]["ApplicationARN"]}
215
+
216
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> dict[str, Any]:
217
+ event = validate_execute_complete_event(event)
218
+
219
+ if event["status"] != "success":
220
+ raise AirflowException(
221
+ "Error while starting AWS Managed Service for Apache Flink application: %s", event
222
+ )
223
+
224
+ response = self.hook.conn.describe_application(
225
+ ApplicationName=event["application_name"],
226
+ )
227
+
228
+ self.log.info(
229
+ "AWS Managed Service for Apache Flink application %s started successfully.",
230
+ event["application_name"],
231
+ )
232
+
233
+ return {"ApplicationARN": response["ApplicationDetail"]["ApplicationARN"]}
234
+
235
+
236
+ class KinesisAnalyticsV2StopApplicationOperator(AwsBaseOperator[KinesisAnalyticsV2Hook]):
237
+ """
238
+ Stop an AWS Managed Service for Apache Flink application.
239
+
240
+ .. seealso::
241
+ For more information on how to use this operator, take a look at the guide:
242
+ :ref:`howto/operator:KinesisAnalyticsV2StopApplicationOperator`
243
+
244
+ :param application_name: The name of your application. (templated)
245
+ :param force: Set to true to force the application to stop. If you set Force to true, Managed Service for
246
+ Apache Flink stops the application without taking a snapshot. (templated)
247
+
248
+ :param wait_for_completion: Whether to wait for job to stop. (default: True)
249
+ :param waiter_delay: Time in seconds to wait between status checks. (default: 60)
250
+ :param waiter_max_attempts: Maximum number of attempts to check for job completion. (default: 20)
251
+ :param deferrable: If True, the operator will wait asynchronously for the job to stop.
252
+ This implies waiting for completion. This mode requires aiobotocore module to be installed.
253
+ (default: False)
254
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
255
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
256
+ running Airflow in a distributed manner and aws_conn_id is None or
257
+ empty, then default boto3 configuration would be used (and must be
258
+ maintained on each worker node).
259
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
260
+ :param verify: Whether to verify SSL certificates. See:
261
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
262
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
263
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
264
+ """
265
+
266
+ aws_hook_class = KinesisAnalyticsV2Hook
267
+ ui_color = "#44b5e2"
268
+
269
+ template_fields: Sequence[str] = aws_template_fields(
270
+ "application_name",
271
+ "force",
272
+ )
273
+
274
+ def __init__(
275
+ self,
276
+ application_name: str,
277
+ force: bool = False,
278
+ wait_for_completion: bool = True,
279
+ waiter_delay: int = 60,
280
+ waiter_max_attempts: int = 20,
281
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
282
+ **kwargs,
283
+ ):
284
+ super().__init__(**kwargs)
285
+ self.application_name = application_name
286
+ self.force = force
287
+ self.wait_for_completion = wait_for_completion
288
+ self.waiter_delay = waiter_delay
289
+ self.waiter_max_attempts = waiter_max_attempts
290
+ self.deferrable = deferrable
291
+
292
+ def execute(self, context: Context) -> dict[str, Any]:
293
+ msg = "AWS Managed Service for Apache Flink application"
294
+
295
+ try:
296
+ self.log.info("Stopping %s %s.", msg, self.application_name)
297
+
298
+ self.hook.conn.stop_application(ApplicationName=self.application_name, Force=self.force)
299
+ except ClientError as error:
300
+ raise AirflowException(
301
+ f"Failed to stop {msg} {self.application_name}: {error.response['Error']['Message']}"
302
+ )
303
+
304
+ describe_response = self.hook.conn.describe_application(ApplicationName=self.application_name)
305
+
306
+ if self.deferrable:
307
+ self.log.info("Deferring for %s to stop: %s.", msg, self.application_name)
308
+ self.defer(
309
+ trigger=KinesisAnalyticsV2ApplicationOperationCompleteTrigger(
310
+ application_name=self.application_name,
311
+ waiter_name="application_stop_complete",
312
+ aws_conn_id=self.aws_conn_id,
313
+ waiter_delay=self.waiter_delay,
314
+ waiter_max_attempts=self.waiter_max_attempts,
315
+ region_name=self.region_name,
316
+ verify=self.verify,
317
+ botocore_config=self.botocore_config,
318
+ ),
319
+ method_name="execute_complete",
320
+ )
321
+ if self.wait_for_completion:
322
+ self.log.info("Waiting for %s to stop: %s.", msg, self.application_name)
323
+
324
+ self.hook.get_waiter("application_stop_complete").wait(
325
+ ApplicationName=self.application_name,
326
+ WaiterConfig={"Delay": self.waiter_delay, "MaxAttempts": self.waiter_max_attempts},
327
+ )
328
+
329
+ self.log.info("%s stopped successfully %s.", msg, self.application_name)
330
+
331
+ return {"ApplicationARN": describe_response["ApplicationDetail"]["ApplicationARN"]}
332
+
333
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> dict[str, Any]:
334
+ event = validate_execute_complete_event(event)
335
+
336
+ if event["status"] != "success":
337
+ raise AirflowException("Error while stopping AWS Managed Service for Apache Flink application")
338
+
339
+ response = self.hook.conn.describe_application(
340
+ ApplicationName=event["application_name"],
341
+ )
342
+
343
+ self.log.info(
344
+ "AWS Managed Service for Apache Flink application %s stopped successfully.",
345
+ event["application_name"],
346
+ )
347
+
348
+ return {"ApplicationARN": response["ApplicationDetail"]["ApplicationARN"]}
@@ -36,6 +36,7 @@ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
36
36
  from airflow.providers.amazon.aws.utils.rds import RdsDbType
37
37
  from airflow.providers.amazon.aws.utils.tags import format_tags
38
38
  from airflow.providers.amazon.aws.utils.waiter_with_logging import wait
39
+ from airflow.utils.helpers import prune_dict
39
40
 
40
41
  if TYPE_CHECKING:
41
42
  from mypy_boto3_rds.type_defs import TagTypeDef
@@ -782,7 +783,7 @@ class RdsStartDbOperator(RdsBaseOperator):
782
783
  aws_conn_id=self.aws_conn_id,
783
784
  region_name=self.region_name,
784
785
  response=start_db_response,
785
- db_type=RdsDbType.INSTANCE,
786
+ db_type=self.db_type,
786
787
  ),
787
788
  method_name="execute_complete",
788
789
  )
@@ -881,12 +882,25 @@ class RdsStopDbOperator(RdsBaseOperator):
881
882
  aws_conn_id=self.aws_conn_id,
882
883
  region_name=self.region_name,
883
884
  response=stop_db_response,
884
- db_type=RdsDbType.INSTANCE,
885
+ db_type=self.db_type,
885
886
  ),
886
887
  method_name="execute_complete",
887
888
  )
888
889
  elif self.wait_for_completion:
889
- self._wait_until_db_stopped()
890
+ waiter = self.hook.get_waiter(f"db_{self.db_type.value}_stopped")
891
+ waiter_key = (
892
+ "DBInstanceIdentifier" if self.db_type == RdsDbType.INSTANCE else "DBClusterIdentifier"
893
+ )
894
+ kwargs = {waiter_key: self.db_identifier}
895
+ waiter.wait(
896
+ WaiterConfig=prune_dict(
897
+ {
898
+ "Delay": self.waiter_delay,
899
+ "MaxAttempts": self.waiter_max_attempts,
900
+ }
901
+ ),
902
+ **kwargs,
903
+ )
890
904
  return json.dumps(stop_db_response, default=str)
891
905
 
892
906
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
@@ -915,23 +929,6 @@ class RdsStopDbOperator(RdsBaseOperator):
915
929
  response = self.hook.conn.stop_db_cluster(DBClusterIdentifier=self.db_identifier)
916
930
  return response
917
931
 
918
- def _wait_until_db_stopped(self):
919
- self.log.info("Waiting for DB %s to reach 'stopped' state", self.db_type.value)
920
- if self.db_type == RdsDbType.INSTANCE:
921
- self.hook.wait_for_db_instance_state(
922
- self.db_identifier,
923
- target_state="stopped",
924
- check_interval=self.waiter_delay,
925
- max_attempts=self.waiter_max_attempts,
926
- )
927
- else:
928
- self.hook.wait_for_db_cluster_state(
929
- self.db_identifier,
930
- target_state="stopped",
931
- check_interval=self.waiter_delay,
932
- max_attempts=self.waiter_max_attempts,
933
- )
934
-
935
932
 
936
933
  __all__ = [
937
934
  "RdsCreateDbSnapshotOperator",
@@ -32,6 +32,7 @@ from airflow.providers.amazon.aws.triggers.redshift_cluster import (
32
32
  RedshiftResumeClusterTrigger,
33
33
  )
34
34
  from airflow.providers.amazon.aws.utils import validate_execute_complete_event
35
+ from airflow.utils.helpers import prune_dict
35
36
 
36
37
  if TYPE_CHECKING:
37
38
  from airflow.utils.context import Context
@@ -256,8 +257,6 @@ class RedshiftCreateClusterOperator(BaseOperator):
256
257
  params["ClusterVersion"] = self.cluster_version
257
258
  if self.allow_version_upgrade:
258
259
  params["AllowVersionUpgrade"] = self.allow_version_upgrade
259
- if self.publicly_accessible:
260
- params["PubliclyAccessible"] = self.publicly_accessible
261
260
  if self.encrypted:
262
261
  params["Encrypted"] = self.encrypted
263
262
  if self.hsm_client_certificate_identifier:
@@ -287,6 +286,10 @@ class RedshiftCreateClusterOperator(BaseOperator):
287
286
  if self.default_iam_role_arn:
288
287
  params["DefaultIamRoleArn"] = self.default_iam_role_arn
289
288
 
289
+ # PubliclyAccessible is True by default on Redshift side, hence, we should always set it regardless
290
+ # of its value
291
+ params["PubliclyAccessible"] = self.publicly_accessible
292
+
290
293
  cluster = redshift_hook.create_cluster(
291
294
  self.cluster_identifier,
292
295
  self.node_type,
@@ -505,8 +508,8 @@ class RedshiftResumeClusterOperator(BaseOperator):
505
508
  aws_conn_id: str | None = "aws_default",
506
509
  wait_for_completion: bool = False,
507
510
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
508
- poll_interval: int = 10,
509
- max_attempts: int = 10,
511
+ poll_interval: int = 30,
512
+ max_attempts: int = 30,
510
513
  **kwargs,
511
514
  ):
512
515
  super().__init__(**kwargs)
@@ -540,38 +543,38 @@ class RedshiftResumeClusterOperator(BaseOperator):
540
543
  else:
541
544
  raise error
542
545
 
543
- if self.deferrable:
544
- cluster_state = redshift_hook.cluster_status(cluster_identifier=self.cluster_identifier)
545
- if cluster_state == "available":
546
- self.log.info("Resumed cluster successfully")
547
- elif cluster_state == "deleting":
548
- raise AirflowException(
549
- "Unable to resume cluster since cluster is currently in status: %s", cluster_state
550
- )
546
+ if self.wait_for_completion:
547
+ if self.deferrable:
548
+ cluster_state = redshift_hook.cluster_status(cluster_identifier=self.cluster_identifier)
549
+ if cluster_state == "available":
550
+ self.log.info("Resumed cluster successfully")
551
+ elif cluster_state == "deleting":
552
+ raise AirflowException(
553
+ "Unable to resume cluster since cluster is currently in status: %s", cluster_state
554
+ )
555
+ else:
556
+ self.defer(
557
+ trigger=RedshiftResumeClusterTrigger(
558
+ cluster_identifier=self.cluster_identifier,
559
+ waiter_delay=self.poll_interval,
560
+ waiter_max_attempts=self.max_attempts,
561
+ aws_conn_id=self.aws_conn_id,
562
+ ),
563
+ method_name="execute_complete",
564
+ # timeout is set to ensure that if a trigger dies, the timeout does not restart
565
+ # 60 seconds is added to allow the trigger to exit gracefully (i.e. yield TriggerEvent)
566
+ timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60),
567
+ )
551
568
  else:
552
- self.defer(
553
- trigger=RedshiftResumeClusterTrigger(
554
- cluster_identifier=self.cluster_identifier,
555
- waiter_delay=self.poll_interval,
556
- waiter_max_attempts=self.max_attempts,
557
- aws_conn_id=self.aws_conn_id,
558
- ),
559
- method_name="execute_complete",
560
- # timeout is set to ensure that if a trigger dies, the timeout does not restart
561
- # 60 seconds is added to allow the trigger to exit gracefully (i.e. yield TriggerEvent)
562
- timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60),
569
+ waiter = redshift_hook.get_waiter("cluster_resumed")
570
+ waiter.wait(
571
+ ClusterIdentifier=self.cluster_identifier,
572
+ WaiterConfig={
573
+ "Delay": self.poll_interval,
574
+ "MaxAttempts": self.max_attempts,
575
+ },
563
576
  )
564
577
 
565
- if self.wait_for_completion:
566
- waiter = redshift_hook.get_waiter("cluster_resumed")
567
- waiter.wait(
568
- ClusterIdentifier=self.cluster_identifier,
569
- WaiterConfig={
570
- "Delay": self.poll_interval,
571
- "MaxAttempts": self.max_attempts,
572
- },
573
- )
574
-
575
578
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
576
579
  event = validate_execute_complete_event(event)
577
580
 
@@ -594,6 +597,7 @@ class RedshiftPauseClusterOperator(BaseOperator):
594
597
  running Airflow in a distributed manner and aws_conn_id is None or
595
598
  empty, then default boto3 configuration would be used (and must be
596
599
  maintained on each worker node).
600
+ :param wait_for_completion: If True, waits for the cluster to be paused. (default: False)
597
601
  :param deferrable: Run operator in the deferrable mode
598
602
  :param poll_interval: Time (in seconds) to wait between two consecutive calls to check cluster state
599
603
  :param max_attempts: Maximum number of attempts to poll the cluster
@@ -608,14 +612,16 @@ class RedshiftPauseClusterOperator(BaseOperator):
608
612
  *,
609
613
  cluster_identifier: str,
610
614
  aws_conn_id: str | None = "aws_default",
615
+ wait_for_completion: bool = False,
611
616
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
612
- poll_interval: int = 10,
613
- max_attempts: int = 15,
617
+ poll_interval: int = 30,
618
+ max_attempts: int = 30,
614
619
  **kwargs,
615
620
  ):
616
621
  super().__init__(**kwargs)
617
622
  self.cluster_identifier = cluster_identifier
618
623
  self.aws_conn_id = aws_conn_id
624
+ self.wait_for_completion = wait_for_completion
619
625
  self.deferrable = deferrable
620
626
  self.max_attempts = max_attempts
621
627
  self.poll_interval = poll_interval
@@ -641,26 +647,38 @@ class RedshiftPauseClusterOperator(BaseOperator):
641
647
  time.sleep(self._attempt_interval)
642
648
  else:
643
649
  raise error
644
- if self.deferrable:
645
- cluster_state = redshift_hook.cluster_status(cluster_identifier=self.cluster_identifier)
646
- if cluster_state == "paused":
647
- self.log.info("Paused cluster successfully")
648
- elif cluster_state == "deleting":
649
- raise AirflowException(
650
- f"Unable to pause cluster since cluster is currently in status: {cluster_state}"
651
- )
650
+ if self.wait_for_completion:
651
+ if self.deferrable:
652
+ cluster_state = redshift_hook.cluster_status(cluster_identifier=self.cluster_identifier)
653
+ if cluster_state == "paused":
654
+ self.log.info("Paused cluster successfully")
655
+ elif cluster_state == "deleting":
656
+ raise AirflowException(
657
+ f"Unable to pause cluster since cluster is currently in status: {cluster_state}"
658
+ )
659
+ else:
660
+ self.defer(
661
+ trigger=RedshiftPauseClusterTrigger(
662
+ cluster_identifier=self.cluster_identifier,
663
+ waiter_delay=self.poll_interval,
664
+ waiter_max_attempts=self.max_attempts,
665
+ aws_conn_id=self.aws_conn_id,
666
+ ),
667
+ method_name="execute_complete",
668
+ # timeout is set to ensure that if a trigger dies, the timeout does not restart
669
+ # 60 seconds is added to allow the trigger to exit gracefully (i.e. yield TriggerEvent)
670
+ timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60),
671
+ )
652
672
  else:
653
- self.defer(
654
- trigger=RedshiftPauseClusterTrigger(
655
- cluster_identifier=self.cluster_identifier,
656
- waiter_delay=self.poll_interval,
657
- waiter_max_attempts=self.max_attempts,
658
- aws_conn_id=self.aws_conn_id,
673
+ waiter = redshift_hook.get_waiter("cluster_paused")
674
+ waiter.wait(
675
+ ClusterIdentifier=self.cluster_identifier,
676
+ WaiterConfig=prune_dict(
677
+ {
678
+ "Delay": self.poll_interval,
679
+ "MaxAttempts": self.max_attempts,
680
+ }
659
681
  ),
660
- method_name="execute_complete",
661
- # timeout is set to ensure that if a trigger dies, the timeout does not restart
662
- # 60 seconds is added to allow the trigger to exit gracefully (i.e. yield TriggerEvent)
663
- timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60),
664
682
  )
665
683
 
666
684
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
@@ -324,8 +324,7 @@ class S3CopyObjectOperator(BaseOperator):
324
324
  )
325
325
 
326
326
  def get_openlineage_facets_on_start(self):
327
- from openlineage.client.run import Dataset
328
-
327
+ from airflow.providers.common.compat.openlineage.facet import Dataset
329
328
  from airflow.providers.openlineage.extractors import OperatorLineage
330
329
 
331
330
  dest_bucket_name, dest_bucket_key = S3Hook.get_s3_bucket_key(
@@ -439,8 +438,7 @@ class S3CreateObjectOperator(BaseOperator):
439
438
  s3_hook.load_bytes(self.data, s3_key, s3_bucket, self.replace, self.encrypt, self.acl_policy)
440
439
 
441
440
  def get_openlineage_facets_on_start(self):
442
- from openlineage.client.run import Dataset
443
-
441
+ from airflow.providers.common.compat.openlineage.facet import Dataset
444
442
  from airflow.providers.openlineage.extractors import OperatorLineage
445
443
 
446
444
  bucket, key = S3Hook.get_s3_bucket_key(self.s3_bucket, self.s3_key, "dest_bucket", "dest_key")
@@ -546,13 +544,12 @@ class S3DeleteObjectsOperator(BaseOperator):
546
544
 
547
545
  def get_openlineage_facets_on_complete(self, task_instance):
548
546
  """Implement _on_complete because object keys are resolved in execute()."""
549
- from openlineage.client.facet import (
547
+ from airflow.providers.common.compat.openlineage.facet import (
548
+ Dataset,
550
549
  LifecycleStateChange,
551
550
  LifecycleStateChangeDatasetFacet,
552
- LifecycleStateChangeDatasetFacetPreviousIdentifier,
551
+ PreviousIdentifier,
553
552
  )
554
- from openlineage.client.run import Dataset
555
-
556
553
  from airflow.providers.openlineage.extractors import OperatorLineage
557
554
 
558
555
  if not self._keys:
@@ -570,7 +567,7 @@ class S3DeleteObjectsOperator(BaseOperator):
570
567
  facets={
571
568
  "lifecycleStateChange": LifecycleStateChangeDatasetFacet(
572
569
  lifecycleStateChange=LifecycleStateChange.DROP.value,
573
- previousIdentifier=LifecycleStateChangeDatasetFacetPreviousIdentifier(
570
+ previousIdentifier=PreviousIdentifier(
574
571
  namespace=bucket_url,
575
572
  name=key,
576
573
  ),
@@ -725,8 +722,7 @@ class S3FileTransformOperator(BaseOperator):
725
722
  self.log.info("Upload successful")
726
723
 
727
724
  def get_openlineage_facets_on_start(self):
728
- from openlineage.client.run import Dataset
729
-
725
+ from airflow.providers.common.compat.openlineage.facet import Dataset
730
726
  from airflow.providers.openlineage.extractors import OperatorLineage
731
727
 
732
728
  dest_bucket_name, dest_bucket_key = S3Hook.get_s3_bucket_key(