apache-airflow-providers-amazon 7.4.1rc1__py3-none-any.whl → 8.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. airflow/providers/amazon/aws/hooks/athena.py +0 -15
  2. airflow/providers/amazon/aws/hooks/base_aws.py +98 -65
  3. airflow/providers/amazon/aws/hooks/batch_client.py +60 -27
  4. airflow/providers/amazon/aws/hooks/batch_waiters.py +3 -1
  5. airflow/providers/amazon/aws/hooks/emr.py +33 -74
  6. airflow/providers/amazon/aws/hooks/logs.py +22 -4
  7. airflow/providers/amazon/aws/hooks/redshift_cluster.py +1 -12
  8. airflow/providers/amazon/aws/hooks/sagemaker.py +0 -16
  9. airflow/providers/amazon/aws/links/emr.py +1 -3
  10. airflow/providers/amazon/aws/operators/athena.py +0 -15
  11. airflow/providers/amazon/aws/operators/batch.py +78 -24
  12. airflow/providers/amazon/aws/operators/ecs.py +21 -58
  13. airflow/providers/amazon/aws/operators/eks.py +0 -1
  14. airflow/providers/amazon/aws/operators/emr.py +94 -24
  15. airflow/providers/amazon/aws/operators/lambda_function.py +0 -19
  16. airflow/providers/amazon/aws/operators/rds.py +1 -1
  17. airflow/providers/amazon/aws/operators/redshift_cluster.py +22 -1
  18. airflow/providers/amazon/aws/operators/redshift_data.py +0 -62
  19. airflow/providers/amazon/aws/secrets/secrets_manager.py +0 -17
  20. airflow/providers/amazon/aws/secrets/systems_manager.py +0 -21
  21. airflow/providers/amazon/aws/sensors/dynamodb.py +97 -0
  22. airflow/providers/amazon/aws/sensors/emr.py +1 -2
  23. airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +1 -1
  24. airflow/providers/amazon/aws/transfers/gcs_to_s3.py +0 -19
  25. airflow/providers/amazon/aws/transfers/glacier_to_gcs.py +1 -7
  26. airflow/providers/amazon/aws/transfers/google_api_to_s3.py +10 -10
  27. airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py +0 -10
  28. airflow/providers/amazon/aws/transfers/mongo_to_s3.py +0 -11
  29. airflow/providers/amazon/aws/transfers/s3_to_sftp.py +0 -10
  30. airflow/providers/amazon/aws/transfers/sql_to_s3.py +23 -9
  31. airflow/providers/amazon/aws/triggers/redshift_cluster.py +54 -2
  32. airflow/providers/amazon/aws/waiters/base_waiter.py +12 -1
  33. airflow/providers/amazon/aws/waiters/emr-serverless.json +18 -0
  34. airflow/providers/amazon/get_provider_info.py +35 -30
  35. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/METADATA +81 -4
  36. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/RECORD +41 -41
  37. airflow/providers/amazon/aws/operators/aws_lambda.py +0 -29
  38. airflow/providers/amazon/aws/operators/redshift_sql.py +0 -57
  39. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/LICENSE +0 -0
  40. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/NOTICE +0 -0
  41. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/WHEEL +0 -0
  42. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/entry_points.txt +0 -0
  43. {apache_airflow_providers_amazon-7.4.1rc1.dist-info → apache_airflow_providers_amazon-8.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,6 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  import asyncio
20
- import warnings
21
20
  from typing import Any, Sequence
22
21
 
23
22
  import botocore.exceptions
@@ -177,22 +176,12 @@ class RedshiftHook(AwsBaseHook):
177
176
  )
178
177
  return response["Snapshot"] if response["Snapshot"] else None
179
178
 
180
- def get_cluster_snapshot_status(self, snapshot_identifier: str, cluster_identifier: str | None = None):
179
+ def get_cluster_snapshot_status(self, snapshot_identifier: str):
181
180
  """
182
181
  Return Redshift cluster snapshot status. If cluster snapshot not found return ``None``
183
182
 
184
183
  :param snapshot_identifier: A unique identifier for the snapshot that you are requesting
185
- :param cluster_identifier: (deprecated) The unique identifier of the cluster
186
- the snapshot was created from
187
184
  """
188
- if cluster_identifier:
189
- warnings.warn(
190
- "Parameter `cluster_identifier` is deprecated."
191
- "This option will be removed in a future version.",
192
- DeprecationWarning,
193
- stacklevel=2,
194
- )
195
-
196
185
  try:
197
186
  response = self.get_conn().describe_cluster_snapshots(
198
187
  SnapshotIdentifier=snapshot_identifier,
@@ -23,7 +23,6 @@ import re
23
23
  import tarfile
24
24
  import tempfile
25
25
  import time
26
- import warnings
27
26
  from collections import Counter
28
27
  from datetime import datetime
29
28
  from functools import partial
@@ -977,21 +976,6 @@ class SageMakerHook(AwsBaseHook):
977
976
  else:
978
977
  next_token = response["NextToken"]
979
978
 
980
- def find_processing_job_by_name(self, processing_job_name: str) -> bool:
981
- """
982
- Query processing job by name
983
-
984
- This method is deprecated.
985
- Please use `airflow.providers.amazon.aws.hooks.sagemaker.count_processing_jobs_by_name`.
986
- """
987
- warnings.warn(
988
- "This method is deprecated. "
989
- "Please use `airflow.providers.amazon.aws.hooks.sagemaker.count_processing_jobs_by_name`.",
990
- DeprecationWarning,
991
- stacklevel=2,
992
- )
993
- return bool(self.count_processing_jobs_by_name(processing_job_name))
994
-
995
979
  @staticmethod
996
980
  def _name_matches_pattern(
997
981
  processing_job_name: str,
@@ -24,9 +24,7 @@ class EmrClusterLink(BaseAwsLink):
24
24
 
25
25
  name = "EMR Cluster"
26
26
  key = "emr_cluster"
27
- format_str = (
28
- BASE_AWS_CONSOLE_LINK + "/elasticmapreduce/home?region={region_name}#cluster-details:{job_flow_id}"
29
- )
27
+ format_str = BASE_AWS_CONSOLE_LINK + "/emr/home?region={region_name}#/clusterDetails/{job_flow_id}"
30
28
 
31
29
 
32
30
  class EmrLogsLink(BaseAwsLink):
@@ -17,7 +17,6 @@
17
17
  # under the License.
18
18
  from __future__ import annotations
19
19
 
20
- import warnings
21
20
  from typing import TYPE_CHECKING, Any, Sequence
22
21
 
23
22
  from airflow.compat.functools import cached_property
@@ -45,7 +44,6 @@ class AthenaOperator(BaseOperator):
45
44
  :param query_execution_context: Context in which query need to be run
46
45
  :param result_configuration: Dict with path to store results in and config related to encryption
47
46
  :param sleep_time: Time (in seconds) to wait between two consecutive calls to check query status on Athena
48
- :param max_tries: Deprecated - use max_polling_attempts instead.
49
47
  :param max_polling_attempts: Number of times to poll for query state before function exits
50
48
  To limit task execution time, use execution_timeout.
51
49
  :param log_query: Whether to log athena query and other execution params when it's executed.
@@ -69,7 +67,6 @@ class AthenaOperator(BaseOperator):
69
67
  query_execution_context: dict[str, str] | None = None,
70
68
  result_configuration: dict[str, Any] | None = None,
71
69
  sleep_time: int = 30,
72
- max_tries: int | None = None,
73
70
  max_polling_attempts: int | None = None,
74
71
  log_query: bool = True,
75
72
  **kwargs: Any,
@@ -88,18 +85,6 @@ class AthenaOperator(BaseOperator):
88
85
  self.query_execution_id: str | None = None
89
86
  self.log_query: bool = log_query
90
87
 
91
- if max_tries:
92
- warnings.warn(
93
- f"Parameter `{self.__class__.__name__}.max_tries` is deprecated and will be removed "
94
- "in a future release. Please use method `max_polling_attempts` instead.",
95
- DeprecationWarning,
96
- stacklevel=2,
97
- )
98
- if max_polling_attempts and max_polling_attempts != max_tries:
99
- raise Exception("max_polling_attempts must be the same value as max_tries")
100
- else:
101
- self.max_polling_attempts = max_tries
102
-
103
88
  @cached_property
104
89
  def hook(self) -> AthenaHook:
105
90
  """Create and return an AthenaHook."""
@@ -25,6 +25,7 @@ An Airflow operator for AWS Batch services
25
25
  """
26
26
  from __future__ import annotations
27
27
 
28
+ import warnings
28
29
  from typing import TYPE_CHECKING, Any, Sequence
29
30
 
30
31
  from airflow.compat.functools import cached_property
@@ -54,7 +55,9 @@ class BatchOperator(BaseOperator):
54
55
  :param job_name: the name for the job that will run on AWS Batch (templated)
55
56
  :param job_definition: the job definition name on AWS Batch
56
57
  :param job_queue: the queue name on AWS Batch
57
- :param overrides: the `containerOverrides` parameter for boto3 (templated)
58
+ :param overrides: DEPRECATED, use container_overrides instead with the same value.
59
+ :param container_overrides: the `containerOverrides` parameter for boto3 (templated)
60
+ :param node_overrides: the `nodeOverrides` parameter for boto3 (templated)
58
61
  :param array_properties: the `arrayProperties` parameter for boto3
59
62
  :param parameters: the `parameters` for boto3 (templated)
60
63
  :param job_id: the job ID, usually unknown (None) until the
@@ -88,14 +91,19 @@ class BatchOperator(BaseOperator):
88
91
  "job_name",
89
92
  "job_definition",
90
93
  "job_queue",
91
- "overrides",
94
+ "container_overrides",
92
95
  "array_properties",
96
+ "node_overrides",
93
97
  "parameters",
94
98
  "waiters",
95
99
  "tags",
96
100
  "wait_for_completion",
97
101
  )
98
- template_fields_renderers = {"overrides": "json", "parameters": "json"}
102
+ template_fields_renderers = {
103
+ "container_overrides": "json",
104
+ "parameters": "json",
105
+ "node_overrides": "json",
106
+ }
99
107
 
100
108
  @property
101
109
  def operator_extra_links(self):
@@ -114,8 +122,10 @@ class BatchOperator(BaseOperator):
114
122
  job_name: str,
115
123
  job_definition: str,
116
124
  job_queue: str,
117
- overrides: dict,
125
+ overrides: dict | None = None, # deprecated
126
+ container_overrides: dict | None = None,
118
127
  array_properties: dict | None = None,
128
+ node_overrides: dict | None = None,
119
129
  parameters: dict | None = None,
120
130
  job_id: str | None = None,
121
131
  waiters: Any | None = None,
@@ -133,17 +143,43 @@ class BatchOperator(BaseOperator):
133
143
  self.job_name = job_name
134
144
  self.job_definition = job_definition
135
145
  self.job_queue = job_queue
136
- self.overrides = overrides or {}
137
- self.array_properties = array_properties or {}
146
+
147
+ self.container_overrides = container_overrides
148
+ # handle `overrides` deprecation in favor of `container_overrides`
149
+ if overrides:
150
+ if container_overrides:
151
+ # disallow setting both old and new params
152
+ raise AirflowException(
153
+ "'container_overrides' replaces the 'overrides' parameter. "
154
+ "You cannot specify both. Please remove assignation to the deprecated 'overrides'."
155
+ )
156
+ self.container_overrides = overrides
157
+ warnings.warn(
158
+ "Parameter `overrides` is deprecated, Please use `container_overrides` instead.",
159
+ DeprecationWarning,
160
+ stacklevel=2,
161
+ )
162
+
163
+ self.node_overrides = node_overrides
164
+ self.array_properties = array_properties
138
165
  self.parameters = parameters or {}
139
166
  self.waiters = waiters
140
167
  self.tags = tags or {}
141
168
  self.wait_for_completion = wait_for_completion
142
- self.hook = BatchClientHook(
143
- max_retries=max_retries,
144
- status_retries=status_retries,
145
- aws_conn_id=aws_conn_id,
146
- region_name=region_name,
169
+
170
+ # params for hook
171
+ self.max_retries = max_retries
172
+ self.status_retries = status_retries
173
+ self.aws_conn_id = aws_conn_id
174
+ self.region_name = region_name
175
+
176
+ @cached_property
177
+ def hook(self) -> BatchClientHook:
178
+ return BatchClientHook(
179
+ max_retries=self.max_retries,
180
+ status_retries=self.status_retries,
181
+ aws_conn_id=self.aws_conn_id,
182
+ region_name=self.region_name,
147
183
  )
148
184
 
149
185
  def execute(self, context: Context):
@@ -174,18 +210,27 @@ class BatchOperator(BaseOperator):
174
210
  self.job_definition,
175
211
  self.job_queue,
176
212
  )
177
- self.log.info("AWS Batch job - container overrides: %s", self.overrides)
213
+
214
+ if self.container_overrides:
215
+ self.log.info("AWS Batch job - container overrides: %s", self.container_overrides)
216
+ if self.array_properties:
217
+ self.log.info("AWS Batch job - array properties: %s", self.array_properties)
218
+ if self.node_overrides:
219
+ self.log.info("AWS Batch job - node properties: %s", self.node_overrides)
220
+
221
+ args = {
222
+ "jobName": self.job_name,
223
+ "jobQueue": self.job_queue,
224
+ "jobDefinition": self.job_definition,
225
+ "arrayProperties": self.array_properties,
226
+ "parameters": self.parameters,
227
+ "tags": self.tags,
228
+ "containerOverrides": self.container_overrides,
229
+ "nodeOverrides": self.node_overrides,
230
+ }
178
231
 
179
232
  try:
180
- response = self.hook.client.submit_job(
181
- jobName=self.job_name,
182
- jobQueue=self.job_queue,
183
- jobDefinition=self.job_definition,
184
- arrayProperties=self.array_properties,
185
- parameters=self.parameters,
186
- containerOverrides=self.overrides,
187
- tags=self.tags,
188
- )
233
+ response = self.hook.client.submit_job(**trim_none_values(args))
189
234
  except Exception as e:
190
235
  self.log.error(
191
236
  "AWS Batch job failed submission - job definition: %s - on queue %s",
@@ -249,15 +294,24 @@ class BatchOperator(BaseOperator):
249
294
  else:
250
295
  self.hook.wait_for_job(self.job_id)
251
296
 
252
- awslogs = self.hook.get_job_awslogs_info(self.job_id)
297
+ awslogs = self.hook.get_job_all_awslogs_info(self.job_id)
253
298
  if awslogs:
254
- self.log.info("AWS Batch job (%s) CloudWatch Events details found: %s", self.job_id, awslogs)
299
+ self.log.info("AWS Batch job (%s) CloudWatch Events details found. Links to logs:", self.job_id)
300
+ link_builder = CloudWatchEventsLink()
301
+ for log in awslogs:
302
+ self.log.info(link_builder.format_link(**log))
303
+ if len(awslogs) > 1:
304
+ # there can be several log streams on multi-node jobs
305
+ self.log.warning(
306
+ "out of all those logs, we can only link to one in the UI. " "Using the first one."
307
+ )
308
+
255
309
  CloudWatchEventsLink.persist(
256
310
  context=context,
257
311
  operator=self,
258
312
  region_name=self.hook.conn_region_name,
259
313
  aws_partition=self.hook.conn_partition,
260
- **awslogs,
314
+ **awslogs[0],
261
315
  )
262
316
 
263
317
  self.hook.check_job_success(self.job_id)
@@ -19,7 +19,6 @@ from __future__ import annotations
19
19
 
20
20
  import re
21
21
  import sys
22
- import warnings
23
22
  from datetime import timedelta
24
23
  from typing import TYPE_CHECKING, Sequence
25
24
 
@@ -29,14 +28,12 @@ from airflow.compat.functools import cached_property
29
28
  from airflow.exceptions import AirflowException
30
29
  from airflow.models import BaseOperator, XCom
31
30
  from airflow.providers.amazon.aws.exceptions import EcsOperatorError, EcsTaskFailToStart
32
-
33
- # TODO: Remove the following import when EcsProtocol and EcsTaskLogFetcher deprecations are removed.
34
- from airflow.providers.amazon.aws.hooks import ecs
35
31
  from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
36
32
  from airflow.providers.amazon.aws.hooks.ecs import (
37
33
  EcsClusterStates,
38
34
  EcsHook,
39
35
  EcsTaskDefinitionStates,
36
+ EcsTaskLogFetcher,
40
37
  should_retry_eni,
41
38
  )
42
39
  from airflow.utils.helpers import prune_dict
@@ -388,6 +385,10 @@ class EcsRunTaskOperator(EcsBaseOperator):
388
385
  AirflowException if an ECS task is stopped (to receive Airflow alerts with the logs of what
389
386
  failed in the code running in ECS).
390
387
  :param wait_for_completion: If True, waits for creation of the cluster to complete. (default: True)
388
+ :param waiter_delay: The amount of time in seconds to wait between attempts,
389
+ if not set then the default waiter value will be used.
390
+ :param waiter_max_attempts: The maximum number of attempts to be made,
391
+ if not set then the default waiter value will be used.
391
392
  """
392
393
 
393
394
  ui_color = "#f0ede4"
@@ -443,6 +444,8 @@ class EcsRunTaskOperator(EcsBaseOperator):
443
444
  reattach: bool = False,
444
445
  number_logs_exception: int = 10,
445
446
  wait_for_completion: bool = True,
447
+ waiter_delay: int | None = None,
448
+ waiter_max_attempts: int | None = None,
446
449
  **kwargs,
447
450
  ):
448
451
  super().__init__(**kwargs)
@@ -474,6 +477,8 @@ class EcsRunTaskOperator(EcsBaseOperator):
474
477
  self.retry_args = quota_retry
475
478
  self.task_log_fetcher: EcsTaskLogFetcher | None = None
476
479
  self.wait_for_completion = wait_for_completion
480
+ self.waiter_delay = waiter_delay
481
+ self.waiter_max_attempts = waiter_max_attempts
477
482
 
478
483
  @provide_session
479
484
  def execute(self, context, session=None):
@@ -596,20 +601,28 @@ class EcsRunTaskOperator(EcsBaseOperator):
596
601
 
597
602
  waiter = self.client.get_waiter("tasks_stopped")
598
603
  waiter.config.max_attempts = sys.maxsize # timeout is managed by airflow
599
- waiter.wait(cluster=self.cluster, tasks=[self.arn])
604
+ waiter.wait(
605
+ cluster=self.cluster,
606
+ tasks=[self.arn],
607
+ WaiterConfig=prune_dict(
608
+ {
609
+ "Delay": self.waiter_delay,
610
+ "MaxAttempts": self.waiter_max_attempts,
611
+ }
612
+ ),
613
+ )
600
614
 
601
615
  return
602
616
 
603
617
  def _aws_logs_enabled(self):
604
618
  return self.awslogs_group and self.awslogs_stream_prefix
605
619
 
606
- # TODO: When the deprecation wrapper below is removed, please fix the following return type hint.
607
- def _get_task_log_fetcher(self) -> ecs.EcsTaskLogFetcher:
620
+ def _get_task_log_fetcher(self) -> EcsTaskLogFetcher:
608
621
  if not self.awslogs_group:
609
622
  raise ValueError("must specify awslogs_group to fetch task logs")
610
623
  log_stream_name = f"{self.awslogs_stream_prefix}/{self.ecs_task_id}"
611
624
 
612
- return ecs.EcsTaskLogFetcher(
625
+ return EcsTaskLogFetcher(
613
626
  aws_conn_id=self.aws_conn_id,
614
627
  region_name=self.awslogs_region,
615
628
  log_group=self.awslogs_group,
@@ -679,53 +692,3 @@ class EcsRunTaskOperator(EcsBaseOperator):
679
692
  cluster=self.cluster, task=self.arn, reason="Task killed by the user"
680
693
  )
681
694
  self.log.info(response)
682
-
683
-
684
- class EcsOperator(EcsRunTaskOperator):
685
- """
686
- This operator is deprecated.
687
- Please use :class:`airflow.providers.amazon.aws.operators.ecs.EcsRunTaskOperator`.
688
- """
689
-
690
- def __init__(self, *args, **kwargs):
691
- warnings.warn(
692
- "This operator is deprecated. "
693
- "Please use `airflow.providers.amazon.aws.operators.ecs.EcsRunTaskOperator`.",
694
- DeprecationWarning,
695
- stacklevel=2,
696
- )
697
- super().__init__(*args, **kwargs)
698
-
699
-
700
- class EcsTaskLogFetcher(ecs.EcsTaskLogFetcher):
701
- """
702
- This class is deprecated.
703
- Please use :class:`airflow.providers.amazon.aws.hooks.ecs.EcsTaskLogFetcher`.
704
- """
705
-
706
- # TODO: Note to deprecator, Be sure to fix the use of `ecs.EcsTaskLogFetcher`
707
- # in the Operators above when you remove this wrapper class.
708
- def __init__(self, *args, **kwargs):
709
- warnings.warn(
710
- "This class is deprecated. "
711
- "Please use `airflow.providers.amazon.aws.hooks.ecs.EcsTaskLogFetcher`.",
712
- DeprecationWarning,
713
- stacklevel=2,
714
- )
715
- super().__init__(*args, **kwargs)
716
-
717
-
718
- class EcsProtocol(ecs.EcsProtocol):
719
- """
720
- This class is deprecated.
721
- Please use :class:`airflow.providers.amazon.aws.hooks.ecs.EcsProtocol`.
722
- """
723
-
724
- # TODO: Note to deprecator, Be sure to fix the use of `ecs.EcsProtocol`
725
- # in the Operators above when you remove this wrapper class.
726
- def __init__(self):
727
- warnings.warn(
728
- "This class is deprecated. Please use `airflow.providers.amazon.aws.hooks.ecs.EcsProtocol`.",
729
- DeprecationWarning,
730
- stacklevel=2,
731
- )
@@ -644,7 +644,6 @@ class EksPodOperator(KubernetesPodOperator):
644
644
  :param namespace: The namespace in which to execute the pod. (templated)
645
645
  :param pod_name: The unique name to give the pod. (templated)
646
646
  :param aws_profile: The named profile containing the credentials for the AWS CLI tool to use.
647
- :param aws_profile: str
648
647
  :param region: Which AWS region the connection should use. (templated)
649
648
  If this is None or empty then the default boto3 behaviour is used.
650
649
  :param aws_conn_id: The Airflow connection used for AWS credentials. (templated)
@@ -1010,21 +1010,25 @@ class EmrServerlessStartJobOperator(BaseOperator):
1010
1010
  return response["jobRunId"]
1011
1011
 
1012
1012
 
1013
- class EmrServerlessDeleteApplicationOperator(BaseOperator):
1013
+ class EmrServerlessStopApplicationOperator(BaseOperator):
1014
1014
  """
1015
- Operator to delete EMR Serverless application
1015
+ Operator to stop an EMR Serverless application
1016
1016
 
1017
1017
  .. seealso::
1018
1018
  For more information on how to use this operator, take a look at the guide:
1019
- :ref:`howto/operator:EmrServerlessDeleteApplicationOperator`
1019
+ :ref:`howto/operator:EmrServerlessStopApplicationOperator`
1020
1020
 
1021
- :param application_id: ID of the EMR Serverless application to delete.
1022
- :param wait_for_completion: If true, wait for the Application to start before returning. Default to True
1021
+ :param application_id: ID of the EMR Serverless application to stop.
1022
+ :param wait_for_completion: If true, wait for the Application to stop before returning. Default to True
1023
1023
  :param aws_conn_id: AWS connection to use
1024
1024
  :param waiter_countdown: Total amount of time, in seconds, the operator will wait for
1025
- the application be deleted. Defaults to 25 minutes.
1025
+ the application be stopped. Defaults to 5 minutes.
1026
1026
  :param waiter_check_interval_seconds: Number of seconds between polling the state of the application.
1027
- Defaults to 60 seconds.
1027
+ Defaults to 30 seconds.
1028
+ :param force_stop: If set to True, any job for that app that is not in a terminal state will be cancelled.
1029
+ Otherwise, trying to stop an app with running jobs will return an error.
1030
+ If you want to wait for the jobs to finish gracefully, use
1031
+ :class:`airflow.providers.amazon.aws.sensors.emr.EmrServerlessJobSensor`
1028
1032
  """
1029
1033
 
1030
1034
  template_fields: Sequence[str] = ("application_id",)
@@ -1034,8 +1038,9 @@ class EmrServerlessDeleteApplicationOperator(BaseOperator):
1034
1038
  application_id: str,
1035
1039
  wait_for_completion: bool = True,
1036
1040
  aws_conn_id: str = "aws_default",
1037
- waiter_countdown: int = 25 * 60,
1038
- waiter_check_interval_seconds: int = 60,
1041
+ waiter_countdown: int = 5 * 60,
1042
+ waiter_check_interval_seconds: int = 30,
1043
+ force_stop: bool = False,
1039
1044
  **kwargs,
1040
1045
  ):
1041
1046
  self.aws_conn_id = aws_conn_id
@@ -1043,6 +1048,7 @@ class EmrServerlessDeleteApplicationOperator(BaseOperator):
1043
1048
  self.wait_for_completion = wait_for_completion
1044
1049
  self.waiter_countdown = waiter_countdown
1045
1050
  self.waiter_check_interval_seconds = waiter_check_interval_seconds
1051
+ self.force_stop = force_stop
1046
1052
  super().__init__(**kwargs)
1047
1053
 
1048
1054
  @cached_property
@@ -1052,30 +1058,94 @@ class EmrServerlessDeleteApplicationOperator(BaseOperator):
1052
1058
 
1053
1059
  def execute(self, context: Context) -> None:
1054
1060
  self.log.info("Stopping application: %s", self.application_id)
1061
+
1062
+ if self.force_stop:
1063
+ self.hook.cancel_running_jobs(
1064
+ self.application_id,
1065
+ waiter_config={
1066
+ "Delay": self.waiter_check_interval_seconds,
1067
+ "MaxAttempts": self.waiter_countdown / self.waiter_check_interval_seconds,
1068
+ },
1069
+ )
1070
+
1055
1071
  self.hook.conn.stop_application(applicationId=self.application_id)
1056
1072
 
1057
- # This should be replaced with a boto waiter when available.
1058
- waiter(
1059
- get_state_callable=self.hook.conn.get_application,
1060
- get_state_args={
1061
- "applicationId": self.application_id,
1062
- },
1063
- parse_response=["application", "state"],
1064
- desired_state=EmrServerlessHook.APPLICATION_FAILURE_STATES,
1065
- failure_states=set(),
1066
- object_type="application",
1067
- action="stopped",
1068
- countdown=self.waiter_countdown,
1069
- check_interval_seconds=self.waiter_check_interval_seconds,
1073
+ if self.wait_for_completion:
1074
+ # This should be replaced with a boto waiter when available.
1075
+ waiter(
1076
+ get_state_callable=self.hook.conn.get_application,
1077
+ get_state_args={
1078
+ "applicationId": self.application_id,
1079
+ },
1080
+ parse_response=["application", "state"],
1081
+ desired_state=EmrServerlessHook.APPLICATION_FAILURE_STATES,
1082
+ failure_states=set(),
1083
+ object_type="application",
1084
+ action="stopped",
1085
+ countdown=self.waiter_countdown,
1086
+ check_interval_seconds=self.waiter_check_interval_seconds,
1087
+ )
1088
+ self.log.info("EMR serverless application %s stopped successfully", self.application_id)
1089
+
1090
+
1091
+ class EmrServerlessDeleteApplicationOperator(EmrServerlessStopApplicationOperator):
1092
+ """
1093
+ Operator to delete EMR Serverless application
1094
+
1095
+ .. seealso::
1096
+ For more information on how to use this operator, take a look at the guide:
1097
+ :ref:`howto/operator:EmrServerlessDeleteApplicationOperator`
1098
+
1099
+ :param application_id: ID of the EMR Serverless application to delete.
1100
+ :param wait_for_completion: If true, wait for the Application to be deleted before returning.
1101
+ Defaults to True. Note that this operator will always wait for the application to be STOPPED first.
1102
+ :param aws_conn_id: AWS connection to use
1103
+ :param waiter_countdown: Total amount of time, in seconds, the operator will wait for each step of first,
1104
+ the application to be stopped, and then deleted. Defaults to 25 minutes.
1105
+ :param waiter_check_interval_seconds: Number of seconds between polling the state of the application.
1106
+ Defaults to 60 seconds.
1107
+ :param force_stop: If set to True, any job for that app that is not in a terminal state will be cancelled.
1108
+ Otherwise, trying to delete an app with running jobs will return an error.
1109
+ If you want to wait for the jobs to finish gracefully, use
1110
+ :class:`airflow.providers.amazon.aws.sensors.emr.EmrServerlessJobSensor`
1111
+ """
1112
+
1113
+ template_fields: Sequence[str] = ("application_id",)
1114
+
1115
+ def __init__(
1116
+ self,
1117
+ application_id: str,
1118
+ wait_for_completion: bool = True,
1119
+ aws_conn_id: str = "aws_default",
1120
+ waiter_countdown: int = 25 * 60,
1121
+ waiter_check_interval_seconds: int = 60,
1122
+ force_stop: bool = False,
1123
+ **kwargs,
1124
+ ):
1125
+ self.wait_for_delete_completion = wait_for_completion
1126
+ # super stops the app
1127
+ super().__init__(
1128
+ application_id=application_id,
1129
+ # when deleting an app, we always need to wait for it to stop before we can call delete()
1130
+ wait_for_completion=True,
1131
+ aws_conn_id=aws_conn_id,
1132
+ waiter_countdown=waiter_countdown,
1133
+ waiter_check_interval_seconds=waiter_check_interval_seconds,
1134
+ force_stop=force_stop,
1135
+ **kwargs,
1070
1136
  )
1071
1137
 
1072
- self.log.info("Deleting application: %s", self.application_id)
1138
+ def execute(self, context: Context) -> None:
1139
+ # super stops the app (or makes sure it's already stopped)
1140
+ super().execute(context)
1141
+
1142
+ self.log.info("Now deleting application: %s", self.application_id)
1073
1143
  response = self.hook.conn.delete_application(applicationId=self.application_id)
1074
1144
 
1075
1145
  if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
1076
1146
  raise AirflowException(f"Application deletion failed: {response}")
1077
1147
 
1078
- if self.wait_for_completion:
1148
+ if self.wait_for_delete_completion:
1079
1149
  # This should be replaced with a boto waiter when available.
1080
1150
  waiter(
1081
1151
  get_state_callable=self.hook.conn.get_application,
@@ -18,7 +18,6 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  import json
21
- import warnings
22
21
  from typing import TYPE_CHECKING, Sequence
23
22
 
24
23
  from airflow.compat.functools import cached_property
@@ -195,21 +194,3 @@ class LambdaInvokeFunctionOperator(BaseOperator):
195
194
  )
196
195
  self.log.info("Lambda function invocation succeeded: %r", response.get("ResponseMetadata"))
197
196
  return payload
198
-
199
-
200
- class AwsLambdaInvokeFunctionOperator(LambdaInvokeFunctionOperator):
201
- """
202
- This class is deprecated.
203
- Please use
204
- :class:`airflow.providers.amazon.aws.operators.lambda_function.LambdaInvokeFunctionOperator`.
205
- """
206
-
207
- def __init__(self, *args, **kwargs):
208
- warnings.warn(
209
- "This class is deprecated."
210
- "Please use"
211
- "`airflow.providers.amazon.aws.operators.lambda_function.LambdaInvokeFunctionOperator`.",
212
- DeprecationWarning,
213
- stacklevel=2,
214
- )
215
- super().__init__(*args, **kwargs)
@@ -80,7 +80,7 @@ class RdsCreateDbSnapshotOperator(RdsBaseOperator):
80
80
  db_snapshot_identifier: str,
81
81
  tags: Sequence[TagTypeDef] | dict | None = None,
82
82
  wait_for_completion: bool = True,
83
- aws_conn_id: str = "aws_conn_id",
83
+ aws_conn_id: str = "aws_default",
84
84
  **kwargs,
85
85
  ):
86
86
  super().__init__(aws_conn_id=aws_conn_id, **kwargs)