apache-airflow-providers-amazon 9.4.0__py3-none-any.whl → 9.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/auth_manager/avp/entities.py +3 -1
  3. airflow/providers/amazon/aws/auth_manager/avp/facade.py +1 -1
  4. airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +80 -110
  5. airflow/providers/amazon/aws/auth_manager/router/login.py +11 -4
  6. airflow/providers/amazon/aws/auth_manager/user.py +7 -4
  7. airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +1 -1
  8. airflow/providers/amazon/aws/hooks/appflow.py +5 -15
  9. airflow/providers/amazon/aws/hooks/athena_sql.py +2 -2
  10. airflow/providers/amazon/aws/hooks/base_aws.py +34 -1
  11. airflow/providers/amazon/aws/hooks/batch_client.py +1 -2
  12. airflow/providers/amazon/aws/hooks/batch_waiters.py +11 -3
  13. airflow/providers/amazon/aws/hooks/dms.py +3 -1
  14. airflow/providers/amazon/aws/hooks/ec2.py +1 -1
  15. airflow/providers/amazon/aws/hooks/eks.py +3 -6
  16. airflow/providers/amazon/aws/hooks/glue.py +6 -2
  17. airflow/providers/amazon/aws/hooks/logs.py +2 -2
  18. airflow/providers/amazon/aws/hooks/mwaa.py +79 -15
  19. airflow/providers/amazon/aws/hooks/redshift_cluster.py +10 -10
  20. airflow/providers/amazon/aws/hooks/redshift_data.py +3 -4
  21. airflow/providers/amazon/aws/hooks/s3.py +3 -1
  22. airflow/providers/amazon/aws/hooks/sagemaker.py +2 -2
  23. airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +188 -0
  24. airflow/providers/amazon/aws/links/athena.py +1 -2
  25. airflow/providers/amazon/aws/links/base_aws.py +8 -1
  26. airflow/providers/amazon/aws/links/sagemaker_unified_studio.py +27 -0
  27. airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +174 -54
  28. airflow/providers/amazon/aws/log/s3_task_handler.py +136 -84
  29. airflow/providers/amazon/aws/notifications/chime.py +1 -2
  30. airflow/providers/amazon/aws/notifications/sns.py +1 -1
  31. airflow/providers/amazon/aws/notifications/sqs.py +1 -1
  32. airflow/providers/amazon/aws/operators/ec2.py +91 -83
  33. airflow/providers/amazon/aws/operators/eks.py +3 -3
  34. airflow/providers/amazon/aws/operators/mwaa.py +73 -2
  35. airflow/providers/amazon/aws/operators/redshift_cluster.py +10 -3
  36. airflow/providers/amazon/aws/operators/s3.py +147 -157
  37. airflow/providers/amazon/aws/operators/sagemaker.py +4 -7
  38. airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +155 -0
  39. airflow/providers/amazon/aws/sensors/ec2.py +5 -12
  40. airflow/providers/amazon/aws/sensors/emr.py +1 -1
  41. airflow/providers/amazon/aws/sensors/glacier.py +1 -1
  42. airflow/providers/amazon/aws/sensors/mwaa.py +161 -0
  43. airflow/providers/amazon/aws/sensors/rds.py +10 -5
  44. airflow/providers/amazon/aws/sensors/s3.py +32 -43
  45. airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +73 -0
  46. airflow/providers/amazon/aws/sensors/step_function.py +2 -1
  47. airflow/providers/amazon/aws/transfers/mongo_to_s3.py +2 -2
  48. airflow/providers/amazon/aws/transfers/redshift_to_s3.py +19 -4
  49. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +19 -3
  50. airflow/providers/amazon/aws/transfers/sql_to_s3.py +1 -1
  51. airflow/providers/amazon/aws/triggers/README.md +4 -4
  52. airflow/providers/amazon/aws/triggers/base.py +11 -2
  53. airflow/providers/amazon/aws/triggers/ecs.py +6 -2
  54. airflow/providers/amazon/aws/triggers/eks.py +2 -2
  55. airflow/providers/amazon/aws/triggers/glue.py +1 -1
  56. airflow/providers/amazon/aws/triggers/mwaa.py +128 -0
  57. airflow/providers/amazon/aws/triggers/s3.py +31 -6
  58. airflow/providers/amazon/aws/triggers/sagemaker.py +2 -2
  59. airflow/providers/amazon/aws/triggers/sagemaker_unified_studio.py +66 -0
  60. airflow/providers/amazon/aws/triggers/sqs.py +11 -3
  61. airflow/providers/amazon/aws/{auth_manager/security_manager/__init__.py → utils/sagemaker_unified_studio.py} +12 -0
  62. airflow/providers/amazon/aws/utils/waiter_with_logging.py +4 -3
  63. airflow/providers/amazon/aws/waiters/mwaa.json +36 -0
  64. airflow/providers/amazon/get_provider_info.py +46 -5
  65. {apache_airflow_providers_amazon-9.4.0.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/METADATA +38 -31
  66. {apache_airflow_providers_amazon-9.4.0.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/RECORD +68 -61
  67. {apache_airflow_providers_amazon-9.4.0.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/WHEEL +1 -1
  68. airflow/providers/amazon/aws/auth_manager/security_manager/aws_security_manager_override.py +0 -40
  69. {apache_airflow_providers_amazon-9.4.0.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/entry_points.txt +0 -0
@@ -23,7 +23,6 @@ import os
23
23
  import re
24
24
  from collections.abc import Sequence
25
25
  from datetime import datetime, timedelta
26
- from functools import cached_property
27
26
  from typing import TYPE_CHECKING, Any, Callable, cast
28
27
 
29
28
  from airflow.configuration import conf
@@ -34,11 +33,13 @@ if TYPE_CHECKING:
34
33
 
35
34
  from airflow.exceptions import AirflowException
36
35
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
36
+ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
37
37
  from airflow.providers.amazon.aws.triggers.s3 import S3KeysUnchangedTrigger, S3KeyTrigger
38
- from airflow.sensors.base import BaseSensorOperator, poke_mode_only
38
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
39
+ from airflow.sensors.base import poke_mode_only
39
40
 
40
41
 
41
- class S3KeySensor(BaseSensorOperator):
42
+ class S3KeySensor(AwsBaseSensor[S3Hook]):
42
43
  """
43
44
  Waits for one or multiple keys (a file-like instance on S3) to be present in a S3 bucket.
44
45
 
@@ -65,17 +66,6 @@ class S3KeySensor(BaseSensorOperator):
65
66
 
66
67
  def check_fn(files: List, **kwargs) -> bool:
67
68
  return any(f.get('Size', 0) > 1048576 for f in files)
68
- :param aws_conn_id: a reference to the s3 connection
69
- :param verify: Whether to verify SSL certificates for S3 connection.
70
- By default, SSL certificates are verified.
71
- You can provide the following values:
72
-
73
- - ``False``: do not validate SSL certificates. SSL will still be used
74
- (unless use_ssl is False), but SSL certificates will not be
75
- verified.
76
- - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
77
- You can specify this argument if you want to use a different
78
- CA cert bundle than the one used by botocore.
79
69
  :param deferrable: Run operator in the deferrable mode
80
70
  :param use_regex: whether to use regex to check bucket
81
71
  :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
@@ -83,9 +73,18 @@ class S3KeySensor(BaseSensorOperator):
83
73
  all available attributes.
84
74
  Default value: "Size".
85
75
  If the requested attribute is not found, the key is still included and the value is None.
76
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
77
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
78
+ running Airflow in a distributed manner and aws_conn_id is None or
79
+ empty, then default boto3 configuration would be used (and must be
80
+ maintained on each worker node).
81
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
82
+ :param verify: Whether or not to verify SSL certificates. See:
83
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
86
84
  """
87
85
 
88
- template_fields: Sequence[str] = ("bucket_key", "bucket_name")
86
+ template_fields: Sequence[str] = aws_template_fields("bucket_key", "bucket_name")
87
+ aws_hook_class = S3Hook
89
88
 
90
89
  def __init__(
91
90
  self,
@@ -94,7 +93,6 @@ class S3KeySensor(BaseSensorOperator):
94
93
  bucket_name: str | None = None,
95
94
  wildcard_match: bool = False,
96
95
  check_fn: Callable[..., bool] | None = None,
97
- aws_conn_id: str | None = "aws_default",
98
96
  verify: str | bool | None = None,
99
97
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
100
98
  use_regex: bool = False,
@@ -106,14 +104,13 @@ class S3KeySensor(BaseSensorOperator):
106
104
  self.bucket_key = bucket_key
107
105
  self.wildcard_match = wildcard_match
108
106
  self.check_fn = check_fn
109
- self.aws_conn_id = aws_conn_id
110
107
  self.verify = verify
111
108
  self.deferrable = deferrable
112
109
  self.use_regex = use_regex
113
110
  self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
114
111
 
115
112
  def _check_key(self, key, context: Context):
116
- bucket_name, key = S3Hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
113
+ bucket_name, key = self.hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
117
114
  self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
118
115
 
119
116
  """
@@ -195,11 +192,13 @@ class S3KeySensor(BaseSensorOperator):
195
192
  self.defer(
196
193
  timeout=timedelta(seconds=self.timeout),
197
194
  trigger=S3KeyTrigger(
198
- bucket_name=cast(str, self.bucket_name),
195
+ bucket_name=cast("str", self.bucket_name),
199
196
  bucket_key=self.bucket_key,
200
197
  wildcard_match=self.wildcard_match,
201
198
  aws_conn_id=self.aws_conn_id,
199
+ region_name=self.region_name,
202
200
  verify=self.verify,
201
+ botocore_config=self.botocore_config,
203
202
  poke_interval=self.poke_interval,
204
203
  should_check_fn=bool(self.check_fn),
205
204
  use_regex=self.use_regex,
@@ -220,13 +219,9 @@ class S3KeySensor(BaseSensorOperator):
220
219
  elif event["status"] == "error":
221
220
  raise AirflowException(event["message"])
222
221
 
223
- @cached_property
224
- def hook(self) -> S3Hook:
225
- return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
226
-
227
222
 
228
223
  @poke_mode_only
229
- class S3KeysUnchangedSensor(BaseSensorOperator):
224
+ class S3KeysUnchangedSensor(AwsBaseSensor[S3Hook]):
230
225
  """
231
226
  Return True if inactivity_period has passed with no increase in the number of objects matching prefix.
232
227
 
@@ -239,17 +234,7 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
239
234
 
240
235
  :param bucket_name: Name of the S3 bucket
241
236
  :param prefix: The prefix being waited on. Relative path from bucket root level.
242
- :param aws_conn_id: a reference to the s3 connection
243
- :param verify: Whether or not to verify SSL certificates for S3 connection.
244
- By default SSL certificates are verified.
245
- You can provide the following values:
246
-
247
- - ``False``: do not validate SSL certificates. SSL will still be used
248
- (unless use_ssl is False), but SSL certificates will not be
249
- verified.
250
- - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
251
- You can specify this argument if you want to use a different
252
- CA cert bundle than the one used by botocore.
237
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
253
238
  :param inactivity_period: The total seconds of inactivity to designate
254
239
  keys unchanged. Note, this mechanism is not real time and
255
240
  this operator may not return until a poke_interval after this period
@@ -261,16 +246,24 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
261
246
  between pokes valid behavior. If true a warning message will be logged
262
247
  when this happens. If false an error will be raised.
263
248
  :param deferrable: Run sensor in the deferrable mode
249
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
250
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
251
+ running Airflow in a distributed manner and aws_conn_id is None or
252
+ empty, then default boto3 configuration would be used (and must be
253
+ maintained on each worker node).
254
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
255
+ :param verify: Whether or not to verify SSL certificates. See:
256
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
264
257
  """
265
258
 
266
- template_fields: Sequence[str] = ("bucket_name", "prefix")
259
+ template_fields: Sequence[str] = aws_template_fields("bucket_name", "prefix")
260
+ aws_hook_class = S3Hook
267
261
 
268
262
  def __init__(
269
263
  self,
270
264
  *,
271
265
  bucket_name: str,
272
266
  prefix: str,
273
- aws_conn_id: str | None = "aws_default",
274
267
  verify: bool | str | None = None,
275
268
  inactivity_period: float = 60 * 60,
276
269
  min_objects: int = 1,
@@ -291,15 +284,9 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
291
284
  self.inactivity_seconds = 0
292
285
  self.allow_delete = allow_delete
293
286
  self.deferrable = deferrable
294
- self.aws_conn_id = aws_conn_id
295
287
  self.verify = verify
296
288
  self.last_activity_time: datetime | None = None
297
289
 
298
- @cached_property
299
- def hook(self):
300
- """Returns S3Hook."""
301
- return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
302
-
303
290
  def is_keys_unchanged(self, current_objects: set[str]) -> bool:
304
291
  """
305
292
  Check for new objects after the inactivity_period and update the sensor state accordingly.
@@ -382,7 +369,9 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
382
369
  inactivity_seconds=self.inactivity_seconds,
383
370
  allow_delete=self.allow_delete,
384
371
  aws_conn_id=self.aws_conn_id,
372
+ region_name=self.region_name,
385
373
  verify=self.verify,
374
+ botocore_config=self.botocore_config,
386
375
  last_activity_time=self.last_activity_time,
387
376
  ),
388
377
  method_name="execute_complete",
@@ -0,0 +1,73 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ """This module contains the Amazon SageMaker Unified Studio Notebook sensor."""
19
+
20
+ from __future__ import annotations
21
+
22
+ from typing import TYPE_CHECKING
23
+
24
+ from airflow.exceptions import AirflowException
25
+ from airflow.providers.amazon.aws.hooks.sagemaker_unified_studio import (
26
+ SageMakerNotebookHook,
27
+ )
28
+ from airflow.sensors.base import BaseSensorOperator
29
+
30
+ if TYPE_CHECKING:
31
+ from airflow.utils.context import Context
32
+
33
+
34
+ class SageMakerNotebookSensor(BaseSensorOperator):
35
+ """
36
+ Waits for a Sagemaker Workflows Notebook execution to reach any of the status below.
37
+
38
+ 'FAILED', 'STOPPED', 'COMPLETED'
39
+
40
+ :param execution_id: The Sagemaker Workflows Notebook running execution identifier
41
+ :param execution_name: The Sagemaker Workflows Notebook unique execution name
42
+ """
43
+
44
+ def __init__(self, *, execution_id: str, execution_name: str, **kwargs):
45
+ super().__init__(**kwargs)
46
+ self.execution_id = execution_id
47
+ self.execution_name = execution_name
48
+ self.success_state = ["COMPLETED"]
49
+ self.in_progress_states = ["PENDING", "RUNNING"]
50
+
51
+ def hook(self):
52
+ return SageMakerNotebookHook(execution_name=self.execution_name)
53
+
54
+ # override from base sensor
55
+ def poke(self, context=None):
56
+ status = self.hook().get_execution_status(execution_id=self.execution_id)
57
+
58
+ if status in self.success_state:
59
+ log_info_message = f"Exiting Execution {self.execution_id} State: {status}"
60
+ self.log.info(log_info_message)
61
+ return True
62
+ elif status in self.in_progress_states:
63
+ return False
64
+ else:
65
+ error_message = f"Exiting Execution {self.execution_id} State: {status}"
66
+ self.log.info(error_message)
67
+ raise AirflowException(error_message)
68
+
69
+ def execute(self, context: Context):
70
+ # This will invoke poke method in the base sensor
71
+ log_info_message = f"Polling Sagemaker Workflows Artifact execution: {self.execution_name} and execution id: {self.execution_id}"
72
+ self.log.info(log_info_message)
73
+ super().execute(context=context)
@@ -81,5 +81,6 @@ class StepFunctionExecutionSensor(AwsBaseSensor[StepFunctionHook]):
81
81
  return False
82
82
 
83
83
  self.log.info("Doing xcom_push of output")
84
- self.xcom_push(context, "output", output)
84
+
85
+ context["ti"].xcom_push(key="output", value=output)
85
86
  return True
@@ -103,7 +103,7 @@ class MongoToS3Operator(BaseOperator):
103
103
  if self.is_pipeline:
104
104
  results: CommandCursor[Any] | Cursor = MongoHook(self.mongo_conn_id).aggregate(
105
105
  mongo_collection=self.mongo_collection,
106
- aggregate_query=cast(list, self.mongo_query),
106
+ aggregate_query=cast("list", self.mongo_query),
107
107
  mongo_db=self.mongo_db,
108
108
  allowDiskUse=self.allow_disk_use,
109
109
  )
@@ -111,7 +111,7 @@ class MongoToS3Operator(BaseOperator):
111
111
  else:
112
112
  results = MongoHook(self.mongo_conn_id).find(
113
113
  mongo_collection=self.mongo_collection,
114
- query=cast(dict, self.mongo_query),
114
+ query=cast("dict", self.mongo_query),
115
115
  projection=self.mongo_projection,
116
116
  mongo_db=self.mongo_db,
117
117
  find_one=False,
@@ -29,6 +29,7 @@ from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
29
29
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
30
30
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
31
31
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
32
+ from airflow.utils.types import NOTSET, ArgNotSet
32
33
 
33
34
  if TYPE_CHECKING:
34
35
  from airflow.utils.context import Context
@@ -102,7 +103,7 @@ class RedshiftToS3Operator(BaseOperator):
102
103
  table: str | None = None,
103
104
  select_query: str | None = None,
104
105
  redshift_conn_id: str = "redshift_default",
105
- aws_conn_id: str | None = "aws_default",
106
+ aws_conn_id: str | None | ArgNotSet = NOTSET,
106
107
  verify: bool | str | None = None,
107
108
  unload_options: list | None = None,
108
109
  autocommit: bool = False,
@@ -118,7 +119,6 @@ class RedshiftToS3Operator(BaseOperator):
118
119
  self.schema = schema
119
120
  self.table = table
120
121
  self.redshift_conn_id = redshift_conn_id
121
- self.aws_conn_id = aws_conn_id
122
122
  self.verify = verify
123
123
  self.unload_options = unload_options or []
124
124
  self.autocommit = autocommit
@@ -127,6 +127,16 @@ class RedshiftToS3Operator(BaseOperator):
127
127
  self.table_as_file_name = table_as_file_name
128
128
  self.redshift_data_api_kwargs = redshift_data_api_kwargs or {}
129
129
  self.select_query = select_query
130
+ # In execute() we attempt to fetch this aws connection to check for extras. If the user didn't
131
+ # actually provide a connection note that, because we don't want to let the exception bubble up in
132
+ # that case (since we're silently injecting a connection on their behalf).
133
+ self._aws_conn_id: str | None
134
+ if isinstance(aws_conn_id, ArgNotSet):
135
+ self.conn_set = False
136
+ self._aws_conn_id = "aws_default"
137
+ else:
138
+ self.conn_set = True
139
+ self._aws_conn_id = aws_conn_id
130
140
 
131
141
  def _build_unload_query(
132
142
  self, credentials_block: str, select_query: str, s3_key: str, unload_options: str
@@ -176,11 +186,16 @@ class RedshiftToS3Operator(BaseOperator):
176
186
  raise AirflowException(f"Cannot include param '{arg}' in Redshift Data API kwargs")
177
187
  else:
178
188
  redshift_sql_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
179
- conn = S3Hook.get_connection(conn_id=self.aws_conn_id) if self.aws_conn_id else None
189
+ conn = (
190
+ S3Hook.get_connection(conn_id=self._aws_conn_id)
191
+ # Only fetch the connection if it was set by the user and it is not None
192
+ if self.conn_set and self._aws_conn_id
193
+ else None
194
+ )
180
195
  if conn and conn.extra_dejson.get("role_arn", False):
181
196
  credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
182
197
  else:
183
- s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
198
+ s3_hook = S3Hook(aws_conn_id=self._aws_conn_id, verify=self.verify)
184
199
  credentials = s3_hook.get_credentials()
185
200
  credentials_block = build_credentials_block(credentials)
186
201
 
@@ -25,6 +25,7 @@ from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
25
25
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
26
26
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
27
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
28
+ from airflow.utils.types import NOTSET, ArgNotSet
28
29
 
29
30
  if TYPE_CHECKING:
30
31
  from airflow.utils.context import Context
@@ -93,7 +94,7 @@ class S3ToRedshiftOperator(BaseOperator):
93
94
  s3_key: str,
94
95
  schema: str | None = None,
95
96
  redshift_conn_id: str = "redshift_default",
96
- aws_conn_id: str | None = "aws_default",
97
+ aws_conn_id: str | None | ArgNotSet = NOTSET,
97
98
  verify: bool | str | None = None,
98
99
  column_list: list[str] | None = None,
99
100
  copy_options: list | None = None,
@@ -117,6 +118,16 @@ class S3ToRedshiftOperator(BaseOperator):
117
118
  self.method = method
118
119
  self.upsert_keys = upsert_keys
119
120
  self.redshift_data_api_kwargs = redshift_data_api_kwargs or {}
121
+ # In execute() we attempt to fetch this aws connection to check for extras. If the user didn't
122
+ # actually provide a connection note that, because we don't want to let the exception bubble up in
123
+ # that case (since we're silently injecting a connection on their behalf).
124
+ self._aws_conn_id: str | None
125
+ if isinstance(aws_conn_id, ArgNotSet):
126
+ self.conn_set = False
127
+ self._aws_conn_id = "aws_default"
128
+ else:
129
+ self.conn_set = True
130
+ self._aws_conn_id = aws_conn_id
120
131
 
121
132
  if self.redshift_data_api_kwargs:
122
133
  for arg in ["sql", "parameters"]:
@@ -149,14 +160,19 @@ class S3ToRedshiftOperator(BaseOperator):
149
160
  else:
150
161
  redshift_sql_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
151
162
 
152
- conn = S3Hook.get_connection(conn_id=self.aws_conn_id) if self.aws_conn_id else None
163
+ conn = (
164
+ S3Hook.get_connection(conn_id=self._aws_conn_id)
165
+ # Only fetch the connection if it was set by the user and it is not None
166
+ if self.conn_set and self._aws_conn_id
167
+ else None
168
+ )
153
169
  region_info = ""
154
170
  if conn and conn.extra_dejson.get("region", False):
155
171
  region_info = f"region '{conn.extra_dejson['region']}'"
156
172
  if conn and conn.extra_dejson.get("role_arn", False):
157
173
  credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
158
174
  else:
159
- s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
175
+ s3_hook = S3Hook(aws_conn_id=self._aws_conn_id, verify=self.verify)
160
176
  credentials = s3_hook.get_credentials()
161
177
  credentials_block = build_credentials_block(credentials)
162
178
 
@@ -223,7 +223,7 @@ class SqlToS3Operator(BaseOperator):
223
223
  return
224
224
  for group_label in (grouped_df := df.groupby(**self.groupby_kwargs)).groups:
225
225
  yield (
226
- cast(str, group_label),
226
+ cast("str", group_label),
227
227
  grouped_df.get_group(group_label)
228
228
  .drop(random_column_name, axis=1, errors="ignore")
229
229
  .reset_index(drop=True),
@@ -65,10 +65,10 @@ To call the asynchronous `wait` function, first create a hook for the particular
65
65
  self.redshift_hook = RedshiftHook(aws_conn_id=self.aws_conn_id)
66
66
  ```
67
67
 
68
- With this hook, we can use the async_conn property to get access to the aiobotocore client:
68
+ With this hook, we can use the asynchronous get_async_conn method to get access to the aiobotocore client:
69
69
 
70
70
  ```python
71
- async with self.redshift_hook.async_conn as client:
71
+ async with await self.redshift_hook.get_async_conn() as client:
72
72
  await client.get_waiter("cluster_available").wait(
73
73
  ClusterIdentifier=self.cluster_identifier,
74
74
  WaiterConfig={
@@ -81,7 +81,7 @@ async with self.redshift_hook.async_conn as client:
81
81
  In this case, we are using the built-in cluster_available waiter. If we wanted to use a custom waiter, we would change the code slightly to use the `get_waiter` function from the hook, rather than the aiobotocore client:
82
82
 
83
83
  ```python
84
- async with self.redshift_hook.async_conn as client:
84
+ async with await self.redshift_hook.get_async_conn() as client:
85
85
  waiter = self.redshift_hook.get_waiter("cluster_paused", deferrable=True, client=client)
86
86
  await waiter.wait(
87
87
  ClusterIdentifier=self.cluster_identifier,
@@ -131,7 +131,7 @@ For more information about writing custom waiter, see the [README.md](https://gi
131
131
  In some cases, a built-in or custom waiter may not be able to solve the problem. In such cases, the asynchronous method used to poll the boto3 API would need to be defined in the hook of the service being used. This method is essentially the same as the synchronous version of the method, except that it will use the aiobotocore client, and will be awaited. For the Redshift example, the async `describe_clusters` method would look as follows:
132
132
 
133
133
  ```python
134
- async with self.async_conn as client:
134
+ async with await self.get_async_conn() as client:
135
135
  response = client.describe_clusters(ClusterIdentifier=self.cluster_identifier)
136
136
  ```
137
137
 
@@ -55,6 +55,8 @@ class AwsBaseWaiterTrigger(BaseTrigger):
55
55
 
56
56
  :param waiter_delay: The amount of time in seconds to wait between attempts.
57
57
  :param waiter_max_attempts: The maximum number of attempts to be made.
58
+ :param waiter_config_overrides: A dict to update waiter's default configuration. Only specified keys will
59
+ be updated.
58
60
  :param aws_conn_id: The Airflow connection used for AWS credentials. To be used to build the hook.
59
61
  :param region_name: The AWS region where the resources to watch are. To be used to build the hook.
60
62
  :param verify: Whether or not to verify SSL certificates. To be used to build the hook.
@@ -77,6 +79,7 @@ class AwsBaseWaiterTrigger(BaseTrigger):
77
79
  return_value: Any,
78
80
  waiter_delay: int,
79
81
  waiter_max_attempts: int,
82
+ waiter_config_overrides: dict[str, Any] | None = None,
80
83
  aws_conn_id: str | None,
81
84
  region_name: str | None = None,
82
85
  verify: bool | str | None = None,
@@ -91,6 +94,7 @@ class AwsBaseWaiterTrigger(BaseTrigger):
91
94
  self.failure_message = failure_message
92
95
  self.status_message = status_message
93
96
  self.status_queries = status_queries
97
+ self.waiter_config_overrides = waiter_config_overrides
94
98
 
95
99
  self.return_key = return_key
96
100
  self.return_value = return_value
@@ -139,8 +143,13 @@ class AwsBaseWaiterTrigger(BaseTrigger):
139
143
 
140
144
  async def run(self) -> AsyncIterator[TriggerEvent]:
141
145
  hook = self.hook()
142
- async with hook.async_conn as client:
143
- waiter = hook.get_waiter(self.waiter_name, deferrable=True, client=client)
146
+ async with await hook.get_async_conn() as client:
147
+ waiter = hook.get_waiter(
148
+ self.waiter_name,
149
+ deferrable=True,
150
+ client=client,
151
+ config_overrides=self.waiter_config_overrides,
152
+ )
144
153
  await async_wait(
145
154
  waiter,
146
155
  self.waiter_delay,
@@ -167,8 +167,12 @@ class TaskDoneTrigger(BaseTrigger):
167
167
 
168
168
  async def run(self) -> AsyncIterator[TriggerEvent]:
169
169
  async with (
170
- EcsHook(aws_conn_id=self.aws_conn_id, region_name=self.region).async_conn as ecs_client,
171
- AwsLogsHook(aws_conn_id=self.aws_conn_id, region_name=self.region).async_conn as logs_client,
170
+ await EcsHook(
171
+ aws_conn_id=self.aws_conn_id, region_name=self.region
172
+ ).get_async_conn() as ecs_client,
173
+ await AwsLogsHook(
174
+ aws_conn_id=self.aws_conn_id, region_name=self.region
175
+ ).get_async_conn() as logs_client,
172
176
  ):
173
177
  waiter = ecs_client.get_waiter("tasks_stopped")
174
178
  logs_token = None
@@ -70,7 +70,7 @@ class EksCreateClusterTrigger(AwsBaseWaiterTrigger):
70
70
  return EksHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
71
71
 
72
72
  async def run(self):
73
- async with self.hook().async_conn as client:
73
+ async with await self.hook().get_async_conn() as client:
74
74
  waiter = client.get_waiter(self.waiter_name)
75
75
  try:
76
76
  await async_wait(
@@ -140,7 +140,7 @@ class EksDeleteClusterTrigger(AwsBaseWaiterTrigger):
140
140
  return EksHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
141
141
 
142
142
  async def run(self):
143
- async with self.hook().async_conn as client:
143
+ async with await self.hook().get_async_conn() as client:
144
144
  waiter = client.get_waiter("cluster_deleted")
145
145
  if self.force_delete_compute:
146
146
  await self.delete_any_nodegroups(client=client)
@@ -157,7 +157,7 @@ class GlueCatalogPartitionTrigger(BaseTrigger):
157
157
  return bool(partitions)
158
158
 
159
159
  async def run(self) -> AsyncIterator[TriggerEvent]:
160
- async with self.hook.async_conn as client:
160
+ async with await self.hook.get_async_conn() as client:
161
161
  while True:
162
162
  result = await self.poke(client=client)
163
163
  if result:
@@ -0,0 +1,128 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ from __future__ import annotations
19
+
20
+ from collections.abc import Collection
21
+ from typing import TYPE_CHECKING
22
+
23
+ from airflow.providers.amazon.aws.hooks.mwaa import MwaaHook
24
+ from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
25
+ from airflow.utils.state import DagRunState
26
+
27
+ if TYPE_CHECKING:
28
+ from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
29
+
30
+
31
+ class MwaaDagRunCompletedTrigger(AwsBaseWaiterTrigger):
32
+ """
33
+ Trigger when an MWAA Dag Run is complete.
34
+
35
+ :param external_env_name: The external MWAA environment name that contains the DAG Run you want to wait for
36
+ (templated)
37
+ :param external_dag_id: The DAG ID in the external MWAA environment that contains the DAG Run you want to wait for
38
+ (templated)
39
+ :param external_dag_run_id: The DAG Run ID in the external MWAA environment that you want to wait for (templated)
40
+ :param success_states: Collection of DAG Run states that would make this task marked as successful, default is
41
+ ``{airflow.utils.state.DagRunState.SUCCESS}`` (templated)
42
+ :param failure_states: Collection of DAG Run states that would make this task marked as failed and raise an
43
+ AirflowException, default is ``{airflow.utils.state.DagRunState.FAILED}`` (templated)
44
+ :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
45
+ :param waiter_max_attempts: The maximum number of attempts to be made. (default: 720)
46
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ *,
52
+ external_env_name: str,
53
+ external_dag_id: str,
54
+ external_dag_run_id: str,
55
+ success_states: Collection[str] | None = None,
56
+ failure_states: Collection[str] | None = None,
57
+ waiter_delay: int = 60,
58
+ waiter_max_attempts: int = 720,
59
+ aws_conn_id: str | None = None,
60
+ ) -> None:
61
+ self.success_states = set(success_states) if success_states else {DagRunState.SUCCESS.value}
62
+ self.failure_states = set(failure_states) if failure_states else {DagRunState.FAILED.value}
63
+
64
+ if len(self.success_states & self.failure_states):
65
+ raise ValueError("success_states and failure_states must not have any values in common")
66
+
67
+ in_progress_states = {s.value for s in DagRunState} - self.success_states - self.failure_states
68
+
69
+ super().__init__(
70
+ serialized_fields={
71
+ "external_env_name": external_env_name,
72
+ "external_dag_id": external_dag_id,
73
+ "external_dag_run_id": external_dag_run_id,
74
+ "success_states": success_states,
75
+ "failure_states": failure_states,
76
+ },
77
+ waiter_name="mwaa_dag_run_complete",
78
+ waiter_args={
79
+ "Name": external_env_name,
80
+ "Path": f"/dags/{external_dag_id}/dagRuns/{external_dag_run_id}",
81
+ "Method": "GET",
82
+ },
83
+ failure_message=f"The DAG run {external_dag_run_id} of DAG {external_dag_id} in MWAA environment {external_env_name} failed with state",
84
+ status_message="State of DAG run",
85
+ status_queries=["RestApiResponse.state"],
86
+ return_key="dag_run_id",
87
+ return_value=external_dag_run_id,
88
+ waiter_delay=waiter_delay,
89
+ waiter_max_attempts=waiter_max_attempts,
90
+ aws_conn_id=aws_conn_id,
91
+ waiter_config_overrides={
92
+ "acceptors": _build_waiter_acceptors(
93
+ success_states=self.success_states,
94
+ failure_states=self.failure_states,
95
+ in_progress_states=in_progress_states,
96
+ )
97
+ },
98
+ )
99
+
100
+ def hook(self) -> AwsGenericHook:
101
+ return MwaaHook(
102
+ aws_conn_id=self.aws_conn_id,
103
+ region_name=self.region_name,
104
+ verify=self.verify,
105
+ config=self.botocore_config,
106
+ )
107
+
108
+
109
+ def _build_waiter_acceptors(
110
+ success_states: set[str], failure_states: set[str], in_progress_states: set[str]
111
+ ) -> list:
112
+ acceptors = []
113
+ for state_set, state_waiter_category in (
114
+ (success_states, "success"),
115
+ (failure_states, "failure"),
116
+ (in_progress_states, "retry"),
117
+ ):
118
+ for dag_run_state in state_set:
119
+ acceptors.append(
120
+ {
121
+ "matcher": "path",
122
+ "argument": "RestApiResponse.state",
123
+ "expected": dag_run_state,
124
+ "state": state_waiter_category,
125
+ }
126
+ )
127
+
128
+ return acceptors