apache-airflow-providers-amazon 9.8.0rc1__py3-none-any.whl → 9.9.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/executors/aws_lambda/__init__.py +21 -0
- airflow/providers/amazon/aws/executors/aws_lambda/docker/Dockerfile +107 -0
- airflow/providers/amazon/aws/executors/aws_lambda/docker/__init__.py +16 -0
- airflow/providers/amazon/aws/executors/aws_lambda/docker/app.py +129 -0
- airflow/providers/amazon/aws/executors/aws_lambda/lambda_executor.py +479 -0
- airflow/providers/amazon/aws/executors/aws_lambda/utils.py +70 -0
- airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +1 -1
- airflow/providers/amazon/aws/executors/ecs/ecs_executor_config.py +4 -8
- airflow/providers/amazon/aws/hooks/base_aws.py +20 -4
- airflow/providers/amazon/aws/hooks/eks.py +14 -5
- airflow/providers/amazon/aws/hooks/s3.py +101 -34
- airflow/providers/amazon/aws/hooks/sns.py +10 -1
- airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +12 -5
- airflow/providers/amazon/aws/operators/batch.py +1 -2
- airflow/providers/amazon/aws/operators/cloud_formation.py +0 -2
- airflow/providers/amazon/aws/operators/comprehend.py +0 -2
- airflow/providers/amazon/aws/operators/dms.py +0 -2
- airflow/providers/amazon/aws/operators/ecs.py +1 -1
- airflow/providers/amazon/aws/operators/eks.py +13 -0
- airflow/providers/amazon/aws/operators/emr.py +4 -4
- airflow/providers/amazon/aws/operators/glue.py +0 -6
- airflow/providers/amazon/aws/operators/rds.py +0 -4
- airflow/providers/amazon/aws/operators/redshift_cluster.py +90 -63
- airflow/providers/amazon/aws/operators/sns.py +15 -1
- airflow/providers/amazon/aws/sensors/redshift_cluster.py +13 -10
- airflow/providers/amazon/get_provider_info.py +68 -0
- {apache_airflow_providers_amazon-9.8.0rc1.dist-info → apache_airflow_providers_amazon-9.9.0rc1.dist-info}/METADATA +15 -19
- {apache_airflow_providers_amazon-9.8.0rc1.dist-info → apache_airflow_providers_amazon-9.9.0rc1.dist-info}/RECORD +31 -25
- {apache_airflow_providers_amazon-9.8.0rc1.dist-info → apache_airflow_providers_amazon-9.9.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_amazon-9.8.0rc1.dist-info → apache_airflow_providers_amazon-9.9.0rc1.dist-info}/entry_points.txt +0 -0
@@ -23,8 +23,8 @@ from typing import TYPE_CHECKING, Any
|
|
23
23
|
|
24
24
|
from airflow.configuration import conf
|
25
25
|
from airflow.exceptions import AirflowException
|
26
|
-
from airflow.models import BaseOperator
|
27
26
|
from airflow.providers.amazon.aws.hooks.redshift_cluster import RedshiftHook
|
27
|
+
from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
|
28
28
|
from airflow.providers.amazon.aws.triggers.redshift_cluster import (
|
29
29
|
RedshiftCreateClusterSnapshotTrigger,
|
30
30
|
RedshiftCreateClusterTrigger,
|
@@ -33,13 +33,14 @@ from airflow.providers.amazon.aws.triggers.redshift_cluster import (
|
|
33
33
|
RedshiftResumeClusterTrigger,
|
34
34
|
)
|
35
35
|
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
36
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
36
37
|
from airflow.utils.helpers import prune_dict
|
37
38
|
|
38
39
|
if TYPE_CHECKING:
|
39
40
|
from airflow.utils.context import Context
|
40
41
|
|
41
42
|
|
42
|
-
class RedshiftCreateClusterOperator(
|
43
|
+
class RedshiftCreateClusterOperator(AwsBaseOperator[RedshiftHook]):
|
43
44
|
"""
|
44
45
|
Creates a new cluster with the specified parameters.
|
45
46
|
|
@@ -95,15 +96,21 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
95
96
|
between Availability Zones after the cluster is created.
|
96
97
|
:param aqua_configuration_status: The cluster is configured to use AQUA .
|
97
98
|
:param default_iam_role_arn: ARN for the IAM role.
|
98
|
-
:param aws_conn_id:
|
99
|
-
|
99
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
100
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
101
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
102
|
+
empty, then default boto3 configuration would be used (and must be
|
103
|
+
maintained on each worker node).
|
104
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
105
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
106
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
100
107
|
:param wait_for_completion: Whether wait for the cluster to be in ``available`` state
|
101
108
|
:param max_attempt: The maximum number of attempts to be made. Default: 5
|
102
109
|
:param poll_interval: The amount of time in seconds to wait between attempts. Default: 60
|
103
110
|
:param deferrable: If True, the operator will run in deferrable mode
|
104
111
|
"""
|
105
112
|
|
106
|
-
template_fields: Sequence[str] = (
|
113
|
+
template_fields: Sequence[str] = aws_template_fields(
|
107
114
|
"cluster_identifier",
|
108
115
|
"cluster_type",
|
109
116
|
"node_type",
|
@@ -141,6 +148,7 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
141
148
|
)
|
142
149
|
ui_color = "#eeaa11"
|
143
150
|
ui_fgcolor = "#ffffff"
|
151
|
+
aws_hook_class = RedshiftHook
|
144
152
|
|
145
153
|
def __init__(
|
146
154
|
self,
|
@@ -178,7 +186,6 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
178
186
|
availability_zone_relocation: bool | None = None,
|
179
187
|
aqua_configuration_status: str | None = None,
|
180
188
|
default_iam_role_arn: str | None = None,
|
181
|
-
aws_conn_id: str | None = "aws_default",
|
182
189
|
wait_for_completion: bool = False,
|
183
190
|
max_attempt: int = 5,
|
184
191
|
poll_interval: int = 60,
|
@@ -219,7 +226,6 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
219
226
|
self.availability_zone_relocation = availability_zone_relocation
|
220
227
|
self.aqua_configuration_status = aqua_configuration_status
|
221
228
|
self.default_iam_role_arn = default_iam_role_arn
|
222
|
-
self.aws_conn_id = aws_conn_id
|
223
229
|
self.wait_for_completion = wait_for_completion
|
224
230
|
self.max_attempt = max_attempt
|
225
231
|
self.poll_interval = poll_interval
|
@@ -227,7 +233,6 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
227
233
|
self.kwargs = kwargs
|
228
234
|
|
229
235
|
def execute(self, context: Context):
|
230
|
-
redshift_hook = RedshiftHook(aws_conn_id=self.aws_conn_id)
|
231
236
|
self.log.info("Creating Redshift cluster %s", self.cluster_identifier)
|
232
237
|
params: dict[str, Any] = {}
|
233
238
|
if self.db_name:
|
@@ -291,7 +296,7 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
291
296
|
# of its value
|
292
297
|
params["PubliclyAccessible"] = self.publicly_accessible
|
293
298
|
|
294
|
-
cluster =
|
299
|
+
cluster = self.hook.create_cluster(
|
295
300
|
self.cluster_identifier,
|
296
301
|
self.node_type,
|
297
302
|
self.master_username,
|
@@ -309,7 +314,7 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
309
314
|
method_name="execute_complete",
|
310
315
|
)
|
311
316
|
if self.wait_for_completion:
|
312
|
-
|
317
|
+
self.hook.get_conn().get_waiter("cluster_available").wait(
|
313
318
|
ClusterIdentifier=self.cluster_identifier,
|
314
319
|
WaiterConfig={
|
315
320
|
"Delay": self.poll_interval,
|
@@ -327,7 +332,7 @@ class RedshiftCreateClusterOperator(BaseOperator):
|
|
327
332
|
raise AirflowException(f"Error creating cluster: {validated_event}")
|
328
333
|
|
329
334
|
|
330
|
-
class RedshiftCreateClusterSnapshotOperator(
|
335
|
+
class RedshiftCreateClusterSnapshotOperator(AwsBaseOperator[RedshiftHook]):
|
331
336
|
"""
|
332
337
|
Creates a manual snapshot of the specified cluster. The cluster must be in the available state.
|
333
338
|
|
@@ -344,15 +349,23 @@ class RedshiftCreateClusterSnapshotOperator(BaseOperator):
|
|
344
349
|
:param poll_interval: Time (in seconds) to wait between two consecutive calls to check state
|
345
350
|
:param max_attempt: The maximum number of attempts to be made to check the state
|
346
351
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
347
|
-
|
352
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
353
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
354
|
+
empty, then default boto3 configuration would be used (and must be
|
355
|
+
maintained on each worker node).
|
356
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
357
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
358
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
348
359
|
:param deferrable: If True, the operator will run as a deferrable operator.
|
349
360
|
"""
|
350
361
|
|
351
|
-
template_fields: Sequence[str] = (
|
362
|
+
template_fields: Sequence[str] = aws_template_fields(
|
352
363
|
"cluster_identifier",
|
353
364
|
"snapshot_identifier",
|
354
365
|
)
|
355
366
|
|
367
|
+
aws_hook_class = RedshiftHook
|
368
|
+
|
356
369
|
def __init__(
|
357
370
|
self,
|
358
371
|
*,
|
@@ -363,7 +376,6 @@ class RedshiftCreateClusterSnapshotOperator(BaseOperator):
|
|
363
376
|
wait_for_completion: bool = False,
|
364
377
|
poll_interval: int = 15,
|
365
378
|
max_attempt: int = 20,
|
366
|
-
aws_conn_id: str | None = "aws_default",
|
367
379
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
368
380
|
**kwargs,
|
369
381
|
):
|
@@ -376,18 +388,16 @@ class RedshiftCreateClusterSnapshotOperator(BaseOperator):
|
|
376
388
|
self.poll_interval = poll_interval
|
377
389
|
self.max_attempt = max_attempt
|
378
390
|
self.deferrable = deferrable
|
379
|
-
self.aws_conn_id = aws_conn_id
|
380
|
-
self.redshift_hook = RedshiftHook(aws_conn_id=aws_conn_id)
|
381
391
|
|
382
392
|
def execute(self, context: Context) -> Any:
|
383
|
-
cluster_state = self.
|
393
|
+
cluster_state = self.hook.cluster_status(cluster_identifier=self.cluster_identifier)
|
384
394
|
if cluster_state != "available":
|
385
395
|
raise AirflowException(
|
386
396
|
"Redshift cluster must be in available state. "
|
387
397
|
f"Redshift cluster current state is {cluster_state}"
|
388
398
|
)
|
389
399
|
|
390
|
-
self.
|
400
|
+
self.hook.create_cluster_snapshot(
|
391
401
|
cluster_identifier=self.cluster_identifier,
|
392
402
|
snapshot_identifier=self.snapshot_identifier,
|
393
403
|
retention_period=self.retention_period,
|
@@ -408,7 +418,7 @@ class RedshiftCreateClusterSnapshotOperator(BaseOperator):
|
|
408
418
|
)
|
409
419
|
|
410
420
|
if self.wait_for_completion:
|
411
|
-
self.
|
421
|
+
self.hook.conn.get_waiter("snapshot_available").wait(
|
412
422
|
ClusterIdentifier=self.cluster_identifier,
|
413
423
|
WaiterConfig={
|
414
424
|
"Delay": self.poll_interval,
|
@@ -425,7 +435,7 @@ class RedshiftCreateClusterSnapshotOperator(BaseOperator):
|
|
425
435
|
self.log.info("Cluster snapshot created.")
|
426
436
|
|
427
437
|
|
428
|
-
class RedshiftDeleteClusterSnapshotOperator(
|
438
|
+
class RedshiftDeleteClusterSnapshotOperator(AwsBaseOperator[RedshiftHook]):
|
429
439
|
"""
|
430
440
|
Deletes the specified manual snapshot.
|
431
441
|
|
@@ -438,11 +448,18 @@ class RedshiftDeleteClusterSnapshotOperator(BaseOperator):
|
|
438
448
|
:param wait_for_completion: Whether wait for cluster deletion or not
|
439
449
|
The default value is ``True``
|
440
450
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
441
|
-
|
451
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
452
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
453
|
+
empty, then default boto3 configuration would be used (and must be
|
454
|
+
maintained on each worker node).
|
455
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
456
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
457
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
442
458
|
:param poll_interval: Time (in seconds) to wait between two consecutive calls to check snapshot state
|
443
459
|
"""
|
444
460
|
|
445
|
-
|
461
|
+
aws_hook_class = RedshiftHook
|
462
|
+
template_fields: Sequence[str] = aws_template_fields(
|
446
463
|
"cluster_identifier",
|
447
464
|
"snapshot_identifier",
|
448
465
|
)
|
@@ -453,7 +470,6 @@ class RedshiftDeleteClusterSnapshotOperator(BaseOperator):
|
|
453
470
|
snapshot_identifier: str,
|
454
471
|
cluster_identifier: str,
|
455
472
|
wait_for_completion: bool = True,
|
456
|
-
aws_conn_id: str | None = "aws_default",
|
457
473
|
poll_interval: int = 10,
|
458
474
|
**kwargs,
|
459
475
|
):
|
@@ -462,10 +478,9 @@ class RedshiftDeleteClusterSnapshotOperator(BaseOperator):
|
|
462
478
|
self.cluster_identifier = cluster_identifier
|
463
479
|
self.wait_for_completion = wait_for_completion
|
464
480
|
self.poll_interval = poll_interval
|
465
|
-
self.redshift_hook = RedshiftHook(aws_conn_id=aws_conn_id)
|
466
481
|
|
467
482
|
def execute(self, context: Context) -> Any:
|
468
|
-
self.
|
483
|
+
self.hook.conn.delete_cluster_snapshot(
|
469
484
|
SnapshotClusterIdentifier=self.cluster_identifier,
|
470
485
|
SnapshotIdentifier=self.snapshot_identifier,
|
471
486
|
)
|
@@ -475,12 +490,12 @@ class RedshiftDeleteClusterSnapshotOperator(BaseOperator):
|
|
475
490
|
time.sleep(self.poll_interval)
|
476
491
|
|
477
492
|
def get_status(self) -> str:
|
478
|
-
return self.
|
493
|
+
return self.hook.get_cluster_snapshot_status(
|
479
494
|
snapshot_identifier=self.snapshot_identifier,
|
480
495
|
)
|
481
496
|
|
482
497
|
|
483
|
-
class RedshiftResumeClusterOperator(
|
498
|
+
class RedshiftResumeClusterOperator(AwsBaseOperator[RedshiftHook]):
|
484
499
|
"""
|
485
500
|
Resume a paused AWS Redshift Cluster.
|
486
501
|
|
@@ -490,7 +505,13 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
490
505
|
|
491
506
|
:param cluster_identifier: Unique identifier of the AWS Redshift cluster
|
492
507
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
493
|
-
|
508
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
509
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
510
|
+
empty, then default boto3 configuration would be used (and must be
|
511
|
+
maintained on each worker node).
|
512
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
513
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
514
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
494
515
|
:param poll_interval: Time (in seconds) to wait between two consecutive calls to check cluster state
|
495
516
|
:param max_attempts: The maximum number of attempts to check the state of the cluster.
|
496
517
|
:param wait_for_completion: If True, the operator will wait for the cluster to be in the
|
@@ -498,15 +519,17 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
498
519
|
:param deferrable: If True, the operator will run as a deferrable operator.
|
499
520
|
"""
|
500
521
|
|
501
|
-
template_fields: Sequence[str] = (
|
522
|
+
template_fields: Sequence[str] = aws_template_fields(
|
523
|
+
"cluster_identifier",
|
524
|
+
)
|
502
525
|
ui_color = "#eeaa11"
|
503
526
|
ui_fgcolor = "#ffffff"
|
527
|
+
aws_hook_class = RedshiftHook
|
504
528
|
|
505
529
|
def __init__(
|
506
530
|
self,
|
507
531
|
*,
|
508
532
|
cluster_identifier: str,
|
509
|
-
aws_conn_id: str | None = "aws_default",
|
510
533
|
wait_for_completion: bool = False,
|
511
534
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
512
535
|
poll_interval: int = 30,
|
@@ -515,7 +538,6 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
515
538
|
):
|
516
539
|
super().__init__(**kwargs)
|
517
540
|
self.cluster_identifier = cluster_identifier
|
518
|
-
self.aws_conn_id = aws_conn_id
|
519
541
|
self.wait_for_completion = wait_for_completion
|
520
542
|
self.deferrable = deferrable
|
521
543
|
self.max_attempts = max_attempts
|
@@ -527,13 +549,12 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
527
549
|
self._attempt_interval = 15
|
528
550
|
|
529
551
|
def execute(self, context: Context):
|
530
|
-
redshift_hook = RedshiftHook(aws_conn_id=self.aws_conn_id)
|
531
552
|
self.log.info("Starting resume cluster")
|
532
553
|
while self._remaining_attempts:
|
533
554
|
try:
|
534
|
-
|
555
|
+
self.hook.conn.resume_cluster(ClusterIdentifier=self.cluster_identifier)
|
535
556
|
break
|
536
|
-
except
|
557
|
+
except self.hook.conn.exceptions.InvalidClusterStateFault as error:
|
537
558
|
self._remaining_attempts -= 1
|
538
559
|
|
539
560
|
if self._remaining_attempts:
|
@@ -546,7 +567,7 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
546
567
|
|
547
568
|
if self.wait_for_completion:
|
548
569
|
if self.deferrable:
|
549
|
-
cluster_state =
|
570
|
+
cluster_state = self.hook.cluster_status(cluster_identifier=self.cluster_identifier)
|
550
571
|
if cluster_state == "available":
|
551
572
|
self.log.info("Resumed cluster successfully")
|
552
573
|
elif cluster_state == "deleting":
|
@@ -567,7 +588,7 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
567
588
|
timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60),
|
568
589
|
)
|
569
590
|
else:
|
570
|
-
waiter =
|
591
|
+
waiter = self.hook.get_waiter("cluster_resumed")
|
571
592
|
waiter.wait(
|
572
593
|
ClusterIdentifier=self.cluster_identifier,
|
573
594
|
WaiterConfig={
|
@@ -584,7 +605,7 @@ class RedshiftResumeClusterOperator(BaseOperator):
|
|
584
605
|
self.log.info("Resumed cluster successfully")
|
585
606
|
|
586
607
|
|
587
|
-
class RedshiftPauseClusterOperator(
|
608
|
+
class RedshiftPauseClusterOperator(AwsBaseOperator[RedshiftHook]):
|
588
609
|
"""
|
589
610
|
Pause an AWS Redshift Cluster if it has status `available`.
|
590
611
|
|
@@ -594,25 +615,30 @@ class RedshiftPauseClusterOperator(BaseOperator):
|
|
594
615
|
|
595
616
|
:param cluster_identifier: id of the AWS Redshift Cluster
|
596
617
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
618
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
619
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
620
|
+
empty, then default boto3 configuration would be used (and must be
|
621
|
+
maintained on each worker node).
|
622
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
623
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
624
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
601
625
|
:param wait_for_completion: If True, waits for the cluster to be paused. (default: False)
|
602
626
|
:param deferrable: Run operator in the deferrable mode
|
603
627
|
:param poll_interval: Time (in seconds) to wait between two consecutive calls to check cluster state
|
604
628
|
:param max_attempts: Maximum number of attempts to poll the cluster
|
605
629
|
"""
|
606
630
|
|
607
|
-
template_fields: Sequence[str] = (
|
631
|
+
template_fields: Sequence[str] = aws_template_fields(
|
632
|
+
"cluster_identifier",
|
633
|
+
)
|
608
634
|
ui_color = "#eeaa11"
|
609
635
|
ui_fgcolor = "#ffffff"
|
636
|
+
aws_hook_class = RedshiftHook
|
610
637
|
|
611
638
|
def __init__(
|
612
639
|
self,
|
613
640
|
*,
|
614
641
|
cluster_identifier: str,
|
615
|
-
aws_conn_id: str | None = "aws_default",
|
616
642
|
wait_for_completion: bool = False,
|
617
643
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
618
644
|
poll_interval: int = 30,
|
@@ -621,7 +647,6 @@ class RedshiftPauseClusterOperator(BaseOperator):
|
|
621
647
|
):
|
622
648
|
super().__init__(**kwargs)
|
623
649
|
self.cluster_identifier = cluster_identifier
|
624
|
-
self.aws_conn_id = aws_conn_id
|
625
650
|
self.wait_for_completion = wait_for_completion
|
626
651
|
self.deferrable = deferrable
|
627
652
|
self.max_attempts = max_attempts
|
@@ -633,12 +658,11 @@ class RedshiftPauseClusterOperator(BaseOperator):
|
|
633
658
|
self._attempt_interval = 15
|
634
659
|
|
635
660
|
def execute(self, context: Context):
|
636
|
-
redshift_hook = RedshiftHook(aws_conn_id=self.aws_conn_id)
|
637
661
|
while self._remaining_attempts:
|
638
662
|
try:
|
639
|
-
|
663
|
+
self.hook.conn.pause_cluster(ClusterIdentifier=self.cluster_identifier)
|
640
664
|
break
|
641
|
-
except
|
665
|
+
except self.hook.conn.exceptions.InvalidClusterStateFault as error:
|
642
666
|
self._remaining_attempts -= 1
|
643
667
|
|
644
668
|
if self._remaining_attempts:
|
@@ -650,7 +674,7 @@ class RedshiftPauseClusterOperator(BaseOperator):
|
|
650
674
|
raise error
|
651
675
|
if self.wait_for_completion:
|
652
676
|
if self.deferrable:
|
653
|
-
cluster_state =
|
677
|
+
cluster_state = self.hook.cluster_status(cluster_identifier=self.cluster_identifier)
|
654
678
|
if cluster_state == "paused":
|
655
679
|
self.log.info("Paused cluster successfully")
|
656
680
|
elif cluster_state == "deleting":
|
@@ -671,7 +695,7 @@ class RedshiftPauseClusterOperator(BaseOperator):
|
|
671
695
|
timeout=timedelta(seconds=self.max_attempts * self.poll_interval + 60),
|
672
696
|
)
|
673
697
|
else:
|
674
|
-
waiter =
|
698
|
+
waiter = self.hook.get_waiter("cluster_paused")
|
675
699
|
waiter.wait(
|
676
700
|
ClusterIdentifier=self.cluster_identifier,
|
677
701
|
WaiterConfig=prune_dict(
|
@@ -690,7 +714,7 @@ class RedshiftPauseClusterOperator(BaseOperator):
|
|
690
714
|
self.log.info("Paused cluster successfully")
|
691
715
|
|
692
716
|
|
693
|
-
class RedshiftDeleteClusterOperator(
|
717
|
+
class RedshiftDeleteClusterOperator(AwsBaseOperator[RedshiftHook]):
|
694
718
|
"""
|
695
719
|
Delete an AWS Redshift cluster.
|
696
720
|
|
@@ -704,18 +728,24 @@ class RedshiftDeleteClusterOperator(BaseOperator):
|
|
704
728
|
:param wait_for_completion: Whether wait for cluster deletion or not
|
705
729
|
The default value is ``True``
|
706
730
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
731
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
732
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
733
|
+
empty, then default boto3 configuration would be used (and must be
|
734
|
+
maintained on each worker node).
|
735
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
736
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
737
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
711
738
|
:param poll_interval: Time (in seconds) to wait between two consecutive calls to check cluster state
|
712
739
|
:param deferrable: Run operator in the deferrable mode.
|
713
740
|
:param max_attempts: (Deferrable mode only) The maximum number of attempts to be made
|
714
741
|
"""
|
715
742
|
|
716
|
-
template_fields: Sequence[str] = (
|
743
|
+
template_fields: Sequence[str] = aws_template_fields(
|
744
|
+
"cluster_identifier",
|
745
|
+
)
|
717
746
|
ui_color = "#eeaa11"
|
718
747
|
ui_fgcolor = "#ffffff"
|
748
|
+
aws_hook_class = RedshiftHook
|
719
749
|
|
720
750
|
def __init__(
|
721
751
|
self,
|
@@ -724,7 +754,6 @@ class RedshiftDeleteClusterOperator(BaseOperator):
|
|
724
754
|
skip_final_cluster_snapshot: bool = True,
|
725
755
|
final_cluster_snapshot_identifier: str | None = None,
|
726
756
|
wait_for_completion: bool = True,
|
727
|
-
aws_conn_id: str | None = "aws_default",
|
728
757
|
poll_interval: int = 30,
|
729
758
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
730
759
|
max_attempts: int = 30,
|
@@ -741,25 +770,23 @@ class RedshiftDeleteClusterOperator(BaseOperator):
|
|
741
770
|
# is thrown. In such case, retrying
|
742
771
|
self._attempts = 10
|
743
772
|
self._attempt_interval = 15
|
744
|
-
self.redshift_hook = RedshiftHook(aws_conn_id=aws_conn_id)
|
745
|
-
self.aws_conn_id = aws_conn_id
|
746
773
|
self.deferrable = deferrable
|
747
774
|
self.max_attempts = max_attempts
|
748
775
|
|
749
776
|
def execute(self, context: Context):
|
750
777
|
while self._attempts:
|
751
778
|
try:
|
752
|
-
self.
|
779
|
+
self.hook.delete_cluster(
|
753
780
|
cluster_identifier=self.cluster_identifier,
|
754
781
|
skip_final_cluster_snapshot=self.skip_final_cluster_snapshot,
|
755
782
|
final_cluster_snapshot_identifier=self.final_cluster_snapshot_identifier,
|
756
783
|
)
|
757
784
|
break
|
758
|
-
except self.
|
785
|
+
except self.hook.conn.exceptions.InvalidClusterStateFault:
|
759
786
|
self._attempts -= 1
|
760
787
|
|
761
788
|
if self._attempts:
|
762
|
-
current_state = self.
|
789
|
+
current_state = self.hook.conn.describe_clusters(
|
763
790
|
ClusterIdentifier=self.cluster_identifier
|
764
791
|
)["Clusters"][0]["ClusterStatus"]
|
765
792
|
self.log.error(
|
@@ -772,7 +799,7 @@ class RedshiftDeleteClusterOperator(BaseOperator):
|
|
772
799
|
raise
|
773
800
|
|
774
801
|
if self.deferrable:
|
775
|
-
cluster_state = self.
|
802
|
+
cluster_state = self.hook.cluster_status(cluster_identifier=self.cluster_identifier)
|
776
803
|
if cluster_state == "cluster_not_found":
|
777
804
|
self.log.info("Cluster deleted successfully")
|
778
805
|
elif cluster_state in ("creating", "modifying"):
|
@@ -792,7 +819,7 @@ class RedshiftDeleteClusterOperator(BaseOperator):
|
|
792
819
|
)
|
793
820
|
|
794
821
|
elif self.wait_for_completion:
|
795
|
-
waiter = self.
|
822
|
+
waiter = self.hook.conn.get_waiter("cluster_deleted")
|
796
823
|
waiter.wait(
|
797
824
|
ClusterIdentifier=self.cluster_identifier,
|
798
825
|
WaiterConfig={"Delay": self.poll_interval, "MaxAttempts": self.max_attempts},
|
@@ -53,6 +53,10 @@ class SnsPublishOperator(AwsBaseOperator[SnsHook]):
|
|
53
53
|
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
54
54
|
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
55
55
|
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
56
|
+
:param message_deduplication_id: Every message must have a unique message_deduplication_id.
|
57
|
+
This parameter applies only to FIFO (first-in-first-out) topics.
|
58
|
+
:param message_group_id: Tag that specifies that a message belongs to a specific message group.
|
59
|
+
This parameter applies only to FIFO (first-in-first-out) topics.
|
56
60
|
"""
|
57
61
|
|
58
62
|
aws_hook_class = SnsHook
|
@@ -61,6 +65,8 @@ class SnsPublishOperator(AwsBaseOperator[SnsHook]):
|
|
61
65
|
"message",
|
62
66
|
"subject",
|
63
67
|
"message_attributes",
|
68
|
+
"message_deduplication_id",
|
69
|
+
"message_group_id",
|
64
70
|
)
|
65
71
|
template_fields_renderers = {"message_attributes": "json"}
|
66
72
|
|
@@ -71,6 +77,8 @@ class SnsPublishOperator(AwsBaseOperator[SnsHook]):
|
|
71
77
|
message: str,
|
72
78
|
subject: str | None = None,
|
73
79
|
message_attributes: dict | None = None,
|
80
|
+
message_deduplication_id: str | None = None,
|
81
|
+
message_group_id: str | None = None,
|
74
82
|
**kwargs,
|
75
83
|
):
|
76
84
|
super().__init__(**kwargs)
|
@@ -78,15 +86,19 @@ class SnsPublishOperator(AwsBaseOperator[SnsHook]):
|
|
78
86
|
self.message = message
|
79
87
|
self.subject = subject
|
80
88
|
self.message_attributes = message_attributes
|
89
|
+
self.message_deduplication_id = message_deduplication_id
|
90
|
+
self.message_group_id = message_group_id
|
81
91
|
|
82
92
|
def execute(self, context: Context):
|
83
93
|
self.log.info(
|
84
|
-
"Sending SNS notification to %s using %s:\nsubject=%s\nattributes=%s\nmessage=%s",
|
94
|
+
"Sending SNS notification to %s using %s:\nsubject=%s\nattributes=%s\nmessage=%s\nmessage_deduplication_id=%s\nmessage_group_id=%s",
|
85
95
|
self.target_arn,
|
86
96
|
self.aws_conn_id,
|
87
97
|
self.subject,
|
88
98
|
self.message_attributes,
|
89
99
|
self.message,
|
100
|
+
self.message_deduplication_id,
|
101
|
+
self.message_group_id,
|
90
102
|
)
|
91
103
|
|
92
104
|
return self.hook.publish_to_target(
|
@@ -94,4 +106,6 @@ class SnsPublishOperator(AwsBaseOperator[SnsHook]):
|
|
94
106
|
message=self.message,
|
95
107
|
subject=self.subject,
|
96
108
|
message_attributes=self.message_attributes,
|
109
|
+
message_deduplication_id=self.message_deduplication_id,
|
110
|
+
message_group_id=self.message_group_id,
|
97
111
|
)
|
@@ -18,21 +18,21 @@ from __future__ import annotations
|
|
18
18
|
|
19
19
|
from collections.abc import Sequence
|
20
20
|
from datetime import timedelta
|
21
|
-
from functools import cached_property
|
22
21
|
from typing import TYPE_CHECKING, Any
|
23
22
|
|
24
23
|
from airflow.configuration import conf
|
25
24
|
from airflow.exceptions import AirflowException
|
26
25
|
from airflow.providers.amazon.aws.hooks.redshift_cluster import RedshiftHook
|
26
|
+
from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
|
27
27
|
from airflow.providers.amazon.aws.triggers.redshift_cluster import RedshiftClusterTrigger
|
28
28
|
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
29
|
-
from airflow.
|
29
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
30
30
|
|
31
31
|
if TYPE_CHECKING:
|
32
32
|
from airflow.utils.context import Context
|
33
33
|
|
34
34
|
|
35
|
-
class RedshiftClusterSensor(
|
35
|
+
class RedshiftClusterSensor(AwsBaseSensor[RedshiftHook]):
|
36
36
|
"""
|
37
37
|
Waits for a Redshift cluster to reach a specific status.
|
38
38
|
|
@@ -43,23 +43,30 @@ class RedshiftClusterSensor(BaseSensorOperator):
|
|
43
43
|
:param cluster_identifier: The identifier for the cluster being pinged.
|
44
44
|
:param target_status: The cluster status desired.
|
45
45
|
:param deferrable: Run operator in the deferrable mode.
|
46
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
47
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
48
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
49
|
+
empty, then default boto3 configuration would be used (and must be
|
50
|
+
maintained on each worker node).
|
51
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
52
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
53
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
46
54
|
"""
|
47
55
|
|
48
|
-
template_fields: Sequence[str] = ("cluster_identifier", "target_status")
|
56
|
+
template_fields: Sequence[str] = aws_template_fields("cluster_identifier", "target_status")
|
57
|
+
aws_hook_class = RedshiftHook
|
49
58
|
|
50
59
|
def __init__(
|
51
60
|
self,
|
52
61
|
*,
|
53
62
|
cluster_identifier: str,
|
54
63
|
target_status: str = "available",
|
55
|
-
aws_conn_id: str | None = "aws_default",
|
56
64
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
57
65
|
**kwargs,
|
58
66
|
):
|
59
67
|
super().__init__(**kwargs)
|
60
68
|
self.cluster_identifier = cluster_identifier
|
61
69
|
self.target_status = target_status
|
62
|
-
self.aws_conn_id = aws_conn_id
|
63
70
|
self.deferrable = deferrable
|
64
71
|
|
65
72
|
def poke(self, context: Context) -> bool:
|
@@ -96,7 +103,3 @@ class RedshiftClusterSensor(BaseSensorOperator):
|
|
96
103
|
if status == "success":
|
97
104
|
self.log.info("%s completed successfully.", self.task_id)
|
98
105
|
self.log.info("Cluster Identifier %s is in %s state", self.cluster_identifier, self.target_status)
|
99
|
-
|
100
|
-
@cached_property
|
101
|
-
def hook(self) -> RedshiftHook:
|
102
|
-
return RedshiftHook(aws_conn_id=self.aws_conn_id)
|