apache-airflow-providers-amazon 9.4.0rc1__py3-none-any.whl → 9.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/auth_manager/avp/entities.py +3 -1
- airflow/providers/amazon/aws/auth_manager/avp/facade.py +1 -1
- airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +80 -110
- airflow/providers/amazon/aws/auth_manager/router/login.py +11 -4
- airflow/providers/amazon/aws/auth_manager/user.py +7 -4
- airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +1 -1
- airflow/providers/amazon/aws/hooks/appflow.py +5 -15
- airflow/providers/amazon/aws/hooks/athena_sql.py +2 -2
- airflow/providers/amazon/aws/hooks/base_aws.py +34 -1
- airflow/providers/amazon/aws/hooks/batch_client.py +1 -2
- airflow/providers/amazon/aws/hooks/batch_waiters.py +11 -3
- airflow/providers/amazon/aws/hooks/dms.py +3 -1
- airflow/providers/amazon/aws/hooks/ec2.py +1 -1
- airflow/providers/amazon/aws/hooks/eks.py +3 -6
- airflow/providers/amazon/aws/hooks/glue.py +6 -2
- airflow/providers/amazon/aws/hooks/logs.py +2 -2
- airflow/providers/amazon/aws/hooks/mwaa.py +79 -15
- airflow/providers/amazon/aws/hooks/redshift_cluster.py +10 -10
- airflow/providers/amazon/aws/hooks/redshift_data.py +3 -4
- airflow/providers/amazon/aws/hooks/s3.py +3 -1
- airflow/providers/amazon/aws/hooks/sagemaker.py +2 -2
- airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +188 -0
- airflow/providers/amazon/aws/links/athena.py +1 -2
- airflow/providers/amazon/aws/links/base_aws.py +8 -1
- airflow/providers/amazon/aws/links/sagemaker_unified_studio.py +27 -0
- airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +174 -54
- airflow/providers/amazon/aws/log/s3_task_handler.py +136 -84
- airflow/providers/amazon/aws/notifications/chime.py +1 -2
- airflow/providers/amazon/aws/notifications/sns.py +1 -1
- airflow/providers/amazon/aws/notifications/sqs.py +1 -1
- airflow/providers/amazon/aws/operators/ec2.py +91 -83
- airflow/providers/amazon/aws/operators/eks.py +3 -3
- airflow/providers/amazon/aws/operators/mwaa.py +73 -2
- airflow/providers/amazon/aws/operators/redshift_cluster.py +10 -3
- airflow/providers/amazon/aws/operators/s3.py +147 -157
- airflow/providers/amazon/aws/operators/sagemaker.py +4 -7
- airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +155 -0
- airflow/providers/amazon/aws/sensors/ec2.py +5 -12
- airflow/providers/amazon/aws/sensors/emr.py +1 -1
- airflow/providers/amazon/aws/sensors/glacier.py +1 -1
- airflow/providers/amazon/aws/sensors/mwaa.py +161 -0
- airflow/providers/amazon/aws/sensors/rds.py +10 -5
- airflow/providers/amazon/aws/sensors/s3.py +32 -43
- airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +73 -0
- airflow/providers/amazon/aws/sensors/step_function.py +2 -1
- airflow/providers/amazon/aws/transfers/mongo_to_s3.py +2 -2
- airflow/providers/amazon/aws/transfers/redshift_to_s3.py +19 -4
- airflow/providers/amazon/aws/transfers/s3_to_redshift.py +19 -3
- airflow/providers/amazon/aws/transfers/sql_to_s3.py +1 -1
- airflow/providers/amazon/aws/triggers/README.md +4 -4
- airflow/providers/amazon/aws/triggers/base.py +11 -2
- airflow/providers/amazon/aws/triggers/ecs.py +6 -2
- airflow/providers/amazon/aws/triggers/eks.py +2 -2
- airflow/providers/amazon/aws/triggers/glue.py +1 -1
- airflow/providers/amazon/aws/triggers/mwaa.py +128 -0
- airflow/providers/amazon/aws/triggers/s3.py +31 -6
- airflow/providers/amazon/aws/triggers/sagemaker.py +2 -2
- airflow/providers/amazon/aws/triggers/sagemaker_unified_studio.py +66 -0
- airflow/providers/amazon/aws/triggers/sqs.py +11 -3
- airflow/providers/amazon/aws/{auth_manager/security_manager/__init__.py → utils/sagemaker_unified_studio.py} +12 -0
- airflow/providers/amazon/aws/utils/waiter_with_logging.py +4 -3
- airflow/providers/amazon/aws/waiters/mwaa.json +36 -0
- airflow/providers/amazon/get_provider_info.py +46 -5
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/METADATA +40 -33
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/RECORD +68 -61
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/WHEEL +1 -1
- airflow/providers/amazon/aws/auth_manager/security_manager/aws_security_manager_override.py +0 -40
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/entry_points.txt +0 -0
@@ -23,7 +23,6 @@ import os
|
|
23
23
|
import re
|
24
24
|
from collections.abc import Sequence
|
25
25
|
from datetime import datetime, timedelta
|
26
|
-
from functools import cached_property
|
27
26
|
from typing import TYPE_CHECKING, Any, Callable, cast
|
28
27
|
|
29
28
|
from airflow.configuration import conf
|
@@ -34,11 +33,13 @@ if TYPE_CHECKING:
|
|
34
33
|
|
35
34
|
from airflow.exceptions import AirflowException
|
36
35
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
36
|
+
from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
|
37
37
|
from airflow.providers.amazon.aws.triggers.s3 import S3KeysUnchangedTrigger, S3KeyTrigger
|
38
|
-
from airflow.
|
38
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
39
|
+
from airflow.sensors.base import poke_mode_only
|
39
40
|
|
40
41
|
|
41
|
-
class S3KeySensor(
|
42
|
+
class S3KeySensor(AwsBaseSensor[S3Hook]):
|
42
43
|
"""
|
43
44
|
Waits for one or multiple keys (a file-like instance on S3) to be present in a S3 bucket.
|
44
45
|
|
@@ -65,17 +66,6 @@ class S3KeySensor(BaseSensorOperator):
|
|
65
66
|
|
66
67
|
def check_fn(files: List, **kwargs) -> bool:
|
67
68
|
return any(f.get('Size', 0) > 1048576 for f in files)
|
68
|
-
:param aws_conn_id: a reference to the s3 connection
|
69
|
-
:param verify: Whether to verify SSL certificates for S3 connection.
|
70
|
-
By default, SSL certificates are verified.
|
71
|
-
You can provide the following values:
|
72
|
-
|
73
|
-
- ``False``: do not validate SSL certificates. SSL will still be used
|
74
|
-
(unless use_ssl is False), but SSL certificates will not be
|
75
|
-
verified.
|
76
|
-
- ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
|
77
|
-
You can specify this argument if you want to use a different
|
78
|
-
CA cert bundle than the one used by botocore.
|
79
69
|
:param deferrable: Run operator in the deferrable mode
|
80
70
|
:param use_regex: whether to use regex to check bucket
|
81
71
|
:param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
|
@@ -83,9 +73,18 @@ class S3KeySensor(BaseSensorOperator):
|
|
83
73
|
all available attributes.
|
84
74
|
Default value: "Size".
|
85
75
|
If the requested attribute is not found, the key is still included and the value is None.
|
76
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
77
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
78
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
79
|
+
empty, then default boto3 configuration would be used (and must be
|
80
|
+
maintained on each worker node).
|
81
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
82
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
83
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
86
84
|
"""
|
87
85
|
|
88
|
-
template_fields: Sequence[str] = ("bucket_key", "bucket_name")
|
86
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_key", "bucket_name")
|
87
|
+
aws_hook_class = S3Hook
|
89
88
|
|
90
89
|
def __init__(
|
91
90
|
self,
|
@@ -94,7 +93,6 @@ class S3KeySensor(BaseSensorOperator):
|
|
94
93
|
bucket_name: str | None = None,
|
95
94
|
wildcard_match: bool = False,
|
96
95
|
check_fn: Callable[..., bool] | None = None,
|
97
|
-
aws_conn_id: str | None = "aws_default",
|
98
96
|
verify: str | bool | None = None,
|
99
97
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
100
98
|
use_regex: bool = False,
|
@@ -106,14 +104,13 @@ class S3KeySensor(BaseSensorOperator):
|
|
106
104
|
self.bucket_key = bucket_key
|
107
105
|
self.wildcard_match = wildcard_match
|
108
106
|
self.check_fn = check_fn
|
109
|
-
self.aws_conn_id = aws_conn_id
|
110
107
|
self.verify = verify
|
111
108
|
self.deferrable = deferrable
|
112
109
|
self.use_regex = use_regex
|
113
110
|
self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
|
114
111
|
|
115
112
|
def _check_key(self, key, context: Context):
|
116
|
-
bucket_name, key =
|
113
|
+
bucket_name, key = self.hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
|
117
114
|
self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
|
118
115
|
|
119
116
|
"""
|
@@ -195,11 +192,13 @@ class S3KeySensor(BaseSensorOperator):
|
|
195
192
|
self.defer(
|
196
193
|
timeout=timedelta(seconds=self.timeout),
|
197
194
|
trigger=S3KeyTrigger(
|
198
|
-
bucket_name=cast(str, self.bucket_name),
|
195
|
+
bucket_name=cast("str", self.bucket_name),
|
199
196
|
bucket_key=self.bucket_key,
|
200
197
|
wildcard_match=self.wildcard_match,
|
201
198
|
aws_conn_id=self.aws_conn_id,
|
199
|
+
region_name=self.region_name,
|
202
200
|
verify=self.verify,
|
201
|
+
botocore_config=self.botocore_config,
|
203
202
|
poke_interval=self.poke_interval,
|
204
203
|
should_check_fn=bool(self.check_fn),
|
205
204
|
use_regex=self.use_regex,
|
@@ -220,13 +219,9 @@ class S3KeySensor(BaseSensorOperator):
|
|
220
219
|
elif event["status"] == "error":
|
221
220
|
raise AirflowException(event["message"])
|
222
221
|
|
223
|
-
@cached_property
|
224
|
-
def hook(self) -> S3Hook:
|
225
|
-
return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
|
226
|
-
|
227
222
|
|
228
223
|
@poke_mode_only
|
229
|
-
class S3KeysUnchangedSensor(
|
224
|
+
class S3KeysUnchangedSensor(AwsBaseSensor[S3Hook]):
|
230
225
|
"""
|
231
226
|
Return True if inactivity_period has passed with no increase in the number of objects matching prefix.
|
232
227
|
|
@@ -239,17 +234,7 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
|
|
239
234
|
|
240
235
|
:param bucket_name: Name of the S3 bucket
|
241
236
|
:param prefix: The prefix being waited on. Relative path from bucket root level.
|
242
|
-
|
243
|
-
:param verify: Whether or not to verify SSL certificates for S3 connection.
|
244
|
-
By default SSL certificates are verified.
|
245
|
-
You can provide the following values:
|
246
|
-
|
247
|
-
- ``False``: do not validate SSL certificates. SSL will still be used
|
248
|
-
(unless use_ssl is False), but SSL certificates will not be
|
249
|
-
verified.
|
250
|
-
- ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
|
251
|
-
You can specify this argument if you want to use a different
|
252
|
-
CA cert bundle than the one used by botocore.
|
237
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
253
238
|
:param inactivity_period: The total seconds of inactivity to designate
|
254
239
|
keys unchanged. Note, this mechanism is not real time and
|
255
240
|
this operator may not return until a poke_interval after this period
|
@@ -261,16 +246,24 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
|
|
261
246
|
between pokes valid behavior. If true a warning message will be logged
|
262
247
|
when this happens. If false an error will be raised.
|
263
248
|
:param deferrable: Run sensor in the deferrable mode
|
249
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
250
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
251
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
252
|
+
empty, then default boto3 configuration would be used (and must be
|
253
|
+
maintained on each worker node).
|
254
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
255
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
256
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
264
257
|
"""
|
265
258
|
|
266
|
-
template_fields: Sequence[str] = ("bucket_name", "prefix")
|
259
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_name", "prefix")
|
260
|
+
aws_hook_class = S3Hook
|
267
261
|
|
268
262
|
def __init__(
|
269
263
|
self,
|
270
264
|
*,
|
271
265
|
bucket_name: str,
|
272
266
|
prefix: str,
|
273
|
-
aws_conn_id: str | None = "aws_default",
|
274
267
|
verify: bool | str | None = None,
|
275
268
|
inactivity_period: float = 60 * 60,
|
276
269
|
min_objects: int = 1,
|
@@ -291,15 +284,9 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
|
|
291
284
|
self.inactivity_seconds = 0
|
292
285
|
self.allow_delete = allow_delete
|
293
286
|
self.deferrable = deferrable
|
294
|
-
self.aws_conn_id = aws_conn_id
|
295
287
|
self.verify = verify
|
296
288
|
self.last_activity_time: datetime | None = None
|
297
289
|
|
298
|
-
@cached_property
|
299
|
-
def hook(self):
|
300
|
-
"""Returns S3Hook."""
|
301
|
-
return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
|
302
|
-
|
303
290
|
def is_keys_unchanged(self, current_objects: set[str]) -> bool:
|
304
291
|
"""
|
305
292
|
Check for new objects after the inactivity_period and update the sensor state accordingly.
|
@@ -382,7 +369,9 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
|
|
382
369
|
inactivity_seconds=self.inactivity_seconds,
|
383
370
|
allow_delete=self.allow_delete,
|
384
371
|
aws_conn_id=self.aws_conn_id,
|
372
|
+
region_name=self.region_name,
|
385
373
|
verify=self.verify,
|
374
|
+
botocore_config=self.botocore_config,
|
386
375
|
last_activity_time=self.last_activity_time,
|
387
376
|
),
|
388
377
|
method_name="execute_complete",
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
"""This module contains the Amazon SageMaker Unified Studio Notebook sensor."""
|
19
|
+
|
20
|
+
from __future__ import annotations
|
21
|
+
|
22
|
+
from typing import TYPE_CHECKING
|
23
|
+
|
24
|
+
from airflow.exceptions import AirflowException
|
25
|
+
from airflow.providers.amazon.aws.hooks.sagemaker_unified_studio import (
|
26
|
+
SageMakerNotebookHook,
|
27
|
+
)
|
28
|
+
from airflow.sensors.base import BaseSensorOperator
|
29
|
+
|
30
|
+
if TYPE_CHECKING:
|
31
|
+
from airflow.utils.context import Context
|
32
|
+
|
33
|
+
|
34
|
+
class SageMakerNotebookSensor(BaseSensorOperator):
|
35
|
+
"""
|
36
|
+
Waits for a Sagemaker Workflows Notebook execution to reach any of the status below.
|
37
|
+
|
38
|
+
'FAILED', 'STOPPED', 'COMPLETED'
|
39
|
+
|
40
|
+
:param execution_id: The Sagemaker Workflows Notebook running execution identifier
|
41
|
+
:param execution_name: The Sagemaker Workflows Notebook unique execution name
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, *, execution_id: str, execution_name: str, **kwargs):
|
45
|
+
super().__init__(**kwargs)
|
46
|
+
self.execution_id = execution_id
|
47
|
+
self.execution_name = execution_name
|
48
|
+
self.success_state = ["COMPLETED"]
|
49
|
+
self.in_progress_states = ["PENDING", "RUNNING"]
|
50
|
+
|
51
|
+
def hook(self):
|
52
|
+
return SageMakerNotebookHook(execution_name=self.execution_name)
|
53
|
+
|
54
|
+
# override from base sensor
|
55
|
+
def poke(self, context=None):
|
56
|
+
status = self.hook().get_execution_status(execution_id=self.execution_id)
|
57
|
+
|
58
|
+
if status in self.success_state:
|
59
|
+
log_info_message = f"Exiting Execution {self.execution_id} State: {status}"
|
60
|
+
self.log.info(log_info_message)
|
61
|
+
return True
|
62
|
+
elif status in self.in_progress_states:
|
63
|
+
return False
|
64
|
+
else:
|
65
|
+
error_message = f"Exiting Execution {self.execution_id} State: {status}"
|
66
|
+
self.log.info(error_message)
|
67
|
+
raise AirflowException(error_message)
|
68
|
+
|
69
|
+
def execute(self, context: Context):
|
70
|
+
# This will invoke poke method in the base sensor
|
71
|
+
log_info_message = f"Polling Sagemaker Workflows Artifact execution: {self.execution_name} and execution id: {self.execution_id}"
|
72
|
+
self.log.info(log_info_message)
|
73
|
+
super().execute(context=context)
|
@@ -103,7 +103,7 @@ class MongoToS3Operator(BaseOperator):
|
|
103
103
|
if self.is_pipeline:
|
104
104
|
results: CommandCursor[Any] | Cursor = MongoHook(self.mongo_conn_id).aggregate(
|
105
105
|
mongo_collection=self.mongo_collection,
|
106
|
-
aggregate_query=cast(list, self.mongo_query),
|
106
|
+
aggregate_query=cast("list", self.mongo_query),
|
107
107
|
mongo_db=self.mongo_db,
|
108
108
|
allowDiskUse=self.allow_disk_use,
|
109
109
|
)
|
@@ -111,7 +111,7 @@ class MongoToS3Operator(BaseOperator):
|
|
111
111
|
else:
|
112
112
|
results = MongoHook(self.mongo_conn_id).find(
|
113
113
|
mongo_collection=self.mongo_collection,
|
114
|
-
query=cast(dict, self.mongo_query),
|
114
|
+
query=cast("dict", self.mongo_query),
|
115
115
|
projection=self.mongo_projection,
|
116
116
|
mongo_db=self.mongo_db,
|
117
117
|
find_one=False,
|
@@ -29,6 +29,7 @@ from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
|
|
29
29
|
from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
|
30
30
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
31
31
|
from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
|
32
|
+
from airflow.utils.types import NOTSET, ArgNotSet
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
35
|
from airflow.utils.context import Context
|
@@ -102,7 +103,7 @@ class RedshiftToS3Operator(BaseOperator):
|
|
102
103
|
table: str | None = None,
|
103
104
|
select_query: str | None = None,
|
104
105
|
redshift_conn_id: str = "redshift_default",
|
105
|
-
aws_conn_id: str | None =
|
106
|
+
aws_conn_id: str | None | ArgNotSet = NOTSET,
|
106
107
|
verify: bool | str | None = None,
|
107
108
|
unload_options: list | None = None,
|
108
109
|
autocommit: bool = False,
|
@@ -118,7 +119,6 @@ class RedshiftToS3Operator(BaseOperator):
|
|
118
119
|
self.schema = schema
|
119
120
|
self.table = table
|
120
121
|
self.redshift_conn_id = redshift_conn_id
|
121
|
-
self.aws_conn_id = aws_conn_id
|
122
122
|
self.verify = verify
|
123
123
|
self.unload_options = unload_options or []
|
124
124
|
self.autocommit = autocommit
|
@@ -127,6 +127,16 @@ class RedshiftToS3Operator(BaseOperator):
|
|
127
127
|
self.table_as_file_name = table_as_file_name
|
128
128
|
self.redshift_data_api_kwargs = redshift_data_api_kwargs or {}
|
129
129
|
self.select_query = select_query
|
130
|
+
# In execute() we attempt to fetch this aws connection to check for extras. If the user didn't
|
131
|
+
# actually provide a connection note that, because we don't want to let the exception bubble up in
|
132
|
+
# that case (since we're silently injecting a connection on their behalf).
|
133
|
+
self._aws_conn_id: str | None
|
134
|
+
if isinstance(aws_conn_id, ArgNotSet):
|
135
|
+
self.conn_set = False
|
136
|
+
self._aws_conn_id = "aws_default"
|
137
|
+
else:
|
138
|
+
self.conn_set = True
|
139
|
+
self._aws_conn_id = aws_conn_id
|
130
140
|
|
131
141
|
def _build_unload_query(
|
132
142
|
self, credentials_block: str, select_query: str, s3_key: str, unload_options: str
|
@@ -176,11 +186,16 @@ class RedshiftToS3Operator(BaseOperator):
|
|
176
186
|
raise AirflowException(f"Cannot include param '{arg}' in Redshift Data API kwargs")
|
177
187
|
else:
|
178
188
|
redshift_sql_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
|
179
|
-
conn =
|
189
|
+
conn = (
|
190
|
+
S3Hook.get_connection(conn_id=self._aws_conn_id)
|
191
|
+
# Only fetch the connection if it was set by the user and it is not None
|
192
|
+
if self.conn_set and self._aws_conn_id
|
193
|
+
else None
|
194
|
+
)
|
180
195
|
if conn and conn.extra_dejson.get("role_arn", False):
|
181
196
|
credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
|
182
197
|
else:
|
183
|
-
s3_hook = S3Hook(aws_conn_id=self.
|
198
|
+
s3_hook = S3Hook(aws_conn_id=self._aws_conn_id, verify=self.verify)
|
184
199
|
credentials = s3_hook.get_credentials()
|
185
200
|
credentials_block = build_credentials_block(credentials)
|
186
201
|
|
@@ -25,6 +25,7 @@ from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
|
|
25
25
|
from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
|
26
26
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
27
27
|
from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
|
28
|
+
from airflow.utils.types import NOTSET, ArgNotSet
|
28
29
|
|
29
30
|
if TYPE_CHECKING:
|
30
31
|
from airflow.utils.context import Context
|
@@ -93,7 +94,7 @@ class S3ToRedshiftOperator(BaseOperator):
|
|
93
94
|
s3_key: str,
|
94
95
|
schema: str | None = None,
|
95
96
|
redshift_conn_id: str = "redshift_default",
|
96
|
-
aws_conn_id: str | None =
|
97
|
+
aws_conn_id: str | None | ArgNotSet = NOTSET,
|
97
98
|
verify: bool | str | None = None,
|
98
99
|
column_list: list[str] | None = None,
|
99
100
|
copy_options: list | None = None,
|
@@ -117,6 +118,16 @@ class S3ToRedshiftOperator(BaseOperator):
|
|
117
118
|
self.method = method
|
118
119
|
self.upsert_keys = upsert_keys
|
119
120
|
self.redshift_data_api_kwargs = redshift_data_api_kwargs or {}
|
121
|
+
# In execute() we attempt to fetch this aws connection to check for extras. If the user didn't
|
122
|
+
# actually provide a connection note that, because we don't want to let the exception bubble up in
|
123
|
+
# that case (since we're silently injecting a connection on their behalf).
|
124
|
+
self._aws_conn_id: str | None
|
125
|
+
if isinstance(aws_conn_id, ArgNotSet):
|
126
|
+
self.conn_set = False
|
127
|
+
self._aws_conn_id = "aws_default"
|
128
|
+
else:
|
129
|
+
self.conn_set = True
|
130
|
+
self._aws_conn_id = aws_conn_id
|
120
131
|
|
121
132
|
if self.redshift_data_api_kwargs:
|
122
133
|
for arg in ["sql", "parameters"]:
|
@@ -149,14 +160,19 @@ class S3ToRedshiftOperator(BaseOperator):
|
|
149
160
|
else:
|
150
161
|
redshift_sql_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
|
151
162
|
|
152
|
-
conn =
|
163
|
+
conn = (
|
164
|
+
S3Hook.get_connection(conn_id=self._aws_conn_id)
|
165
|
+
# Only fetch the connection if it was set by the user and it is not None
|
166
|
+
if self.conn_set and self._aws_conn_id
|
167
|
+
else None
|
168
|
+
)
|
153
169
|
region_info = ""
|
154
170
|
if conn and conn.extra_dejson.get("region", False):
|
155
171
|
region_info = f"region '{conn.extra_dejson['region']}'"
|
156
172
|
if conn and conn.extra_dejson.get("role_arn", False):
|
157
173
|
credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
|
158
174
|
else:
|
159
|
-
s3_hook = S3Hook(aws_conn_id=self.
|
175
|
+
s3_hook = S3Hook(aws_conn_id=self._aws_conn_id, verify=self.verify)
|
160
176
|
credentials = s3_hook.get_credentials()
|
161
177
|
credentials_block = build_credentials_block(credentials)
|
162
178
|
|
@@ -223,7 +223,7 @@ class SqlToS3Operator(BaseOperator):
|
|
223
223
|
return
|
224
224
|
for group_label in (grouped_df := df.groupby(**self.groupby_kwargs)).groups:
|
225
225
|
yield (
|
226
|
-
cast(str, group_label),
|
226
|
+
cast("str", group_label),
|
227
227
|
grouped_df.get_group(group_label)
|
228
228
|
.drop(random_column_name, axis=1, errors="ignore")
|
229
229
|
.reset_index(drop=True),
|
@@ -65,10 +65,10 @@ To call the asynchronous `wait` function, first create a hook for the particular
|
|
65
65
|
self.redshift_hook = RedshiftHook(aws_conn_id=self.aws_conn_id)
|
66
66
|
```
|
67
67
|
|
68
|
-
With this hook, we can use the
|
68
|
+
With this hook, we can use the asynchronous get_async_conn method to get access to the aiobotocore client:
|
69
69
|
|
70
70
|
```python
|
71
|
-
async with self.redshift_hook.
|
71
|
+
async with await self.redshift_hook.get_async_conn() as client:
|
72
72
|
await client.get_waiter("cluster_available").wait(
|
73
73
|
ClusterIdentifier=self.cluster_identifier,
|
74
74
|
WaiterConfig={
|
@@ -81,7 +81,7 @@ async with self.redshift_hook.async_conn as client:
|
|
81
81
|
In this case, we are using the built-in cluster_available waiter. If we wanted to use a custom waiter, we would change the code slightly to use the `get_waiter` function from the hook, rather than the aiobotocore client:
|
82
82
|
|
83
83
|
```python
|
84
|
-
async with self.redshift_hook.
|
84
|
+
async with await self.redshift_hook.get_async_conn() as client:
|
85
85
|
waiter = self.redshift_hook.get_waiter("cluster_paused", deferrable=True, client=client)
|
86
86
|
await waiter.wait(
|
87
87
|
ClusterIdentifier=self.cluster_identifier,
|
@@ -131,7 +131,7 @@ For more information about writing custom waiter, see the [README.md](https://gi
|
|
131
131
|
In some cases, a built-in or custom waiter may not be able to solve the problem. In such cases, the asynchronous method used to poll the boto3 API would need to be defined in the hook of the service being used. This method is essentially the same as the synchronous version of the method, except that it will use the aiobotocore client, and will be awaited. For the Redshift example, the async `describe_clusters` method would look as follows:
|
132
132
|
|
133
133
|
```python
|
134
|
-
async with self.
|
134
|
+
async with await self.get_async_conn() as client:
|
135
135
|
response = client.describe_clusters(ClusterIdentifier=self.cluster_identifier)
|
136
136
|
```
|
137
137
|
|
@@ -55,6 +55,8 @@ class AwsBaseWaiterTrigger(BaseTrigger):
|
|
55
55
|
|
56
56
|
:param waiter_delay: The amount of time in seconds to wait between attempts.
|
57
57
|
:param waiter_max_attempts: The maximum number of attempts to be made.
|
58
|
+
:param waiter_config_overrides: A dict to update waiter's default configuration. Only specified keys will
|
59
|
+
be updated.
|
58
60
|
:param aws_conn_id: The Airflow connection used for AWS credentials. To be used to build the hook.
|
59
61
|
:param region_name: The AWS region where the resources to watch are. To be used to build the hook.
|
60
62
|
:param verify: Whether or not to verify SSL certificates. To be used to build the hook.
|
@@ -77,6 +79,7 @@ class AwsBaseWaiterTrigger(BaseTrigger):
|
|
77
79
|
return_value: Any,
|
78
80
|
waiter_delay: int,
|
79
81
|
waiter_max_attempts: int,
|
82
|
+
waiter_config_overrides: dict[str, Any] | None = None,
|
80
83
|
aws_conn_id: str | None,
|
81
84
|
region_name: str | None = None,
|
82
85
|
verify: bool | str | None = None,
|
@@ -91,6 +94,7 @@ class AwsBaseWaiterTrigger(BaseTrigger):
|
|
91
94
|
self.failure_message = failure_message
|
92
95
|
self.status_message = status_message
|
93
96
|
self.status_queries = status_queries
|
97
|
+
self.waiter_config_overrides = waiter_config_overrides
|
94
98
|
|
95
99
|
self.return_key = return_key
|
96
100
|
self.return_value = return_value
|
@@ -139,8 +143,13 @@ class AwsBaseWaiterTrigger(BaseTrigger):
|
|
139
143
|
|
140
144
|
async def run(self) -> AsyncIterator[TriggerEvent]:
|
141
145
|
hook = self.hook()
|
142
|
-
async with hook.
|
143
|
-
waiter = hook.get_waiter(
|
146
|
+
async with await hook.get_async_conn() as client:
|
147
|
+
waiter = hook.get_waiter(
|
148
|
+
self.waiter_name,
|
149
|
+
deferrable=True,
|
150
|
+
client=client,
|
151
|
+
config_overrides=self.waiter_config_overrides,
|
152
|
+
)
|
144
153
|
await async_wait(
|
145
154
|
waiter,
|
146
155
|
self.waiter_delay,
|
@@ -167,8 +167,12 @@ class TaskDoneTrigger(BaseTrigger):
|
|
167
167
|
|
168
168
|
async def run(self) -> AsyncIterator[TriggerEvent]:
|
169
169
|
async with (
|
170
|
-
EcsHook(
|
171
|
-
|
170
|
+
await EcsHook(
|
171
|
+
aws_conn_id=self.aws_conn_id, region_name=self.region
|
172
|
+
).get_async_conn() as ecs_client,
|
173
|
+
await AwsLogsHook(
|
174
|
+
aws_conn_id=self.aws_conn_id, region_name=self.region
|
175
|
+
).get_async_conn() as logs_client,
|
172
176
|
):
|
173
177
|
waiter = ecs_client.get_waiter("tasks_stopped")
|
174
178
|
logs_token = None
|
@@ -70,7 +70,7 @@ class EksCreateClusterTrigger(AwsBaseWaiterTrigger):
|
|
70
70
|
return EksHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
|
71
71
|
|
72
72
|
async def run(self):
|
73
|
-
async with self.hook().
|
73
|
+
async with await self.hook().get_async_conn() as client:
|
74
74
|
waiter = client.get_waiter(self.waiter_name)
|
75
75
|
try:
|
76
76
|
await async_wait(
|
@@ -140,7 +140,7 @@ class EksDeleteClusterTrigger(AwsBaseWaiterTrigger):
|
|
140
140
|
return EksHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
|
141
141
|
|
142
142
|
async def run(self):
|
143
|
-
async with self.hook().
|
143
|
+
async with await self.hook().get_async_conn() as client:
|
144
144
|
waiter = client.get_waiter("cluster_deleted")
|
145
145
|
if self.force_delete_compute:
|
146
146
|
await self.delete_any_nodegroups(client=client)
|
@@ -157,7 +157,7 @@ class GlueCatalogPartitionTrigger(BaseTrigger):
|
|
157
157
|
return bool(partitions)
|
158
158
|
|
159
159
|
async def run(self) -> AsyncIterator[TriggerEvent]:
|
160
|
-
async with self.hook.
|
160
|
+
async with await self.hook.get_async_conn() as client:
|
161
161
|
while True:
|
162
162
|
result = await self.poke(client=client)
|
163
163
|
if result:
|
@@ -0,0 +1,128 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
from collections.abc import Collection
|
21
|
+
from typing import TYPE_CHECKING
|
22
|
+
|
23
|
+
from airflow.providers.amazon.aws.hooks.mwaa import MwaaHook
|
24
|
+
from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
|
25
|
+
from airflow.utils.state import DagRunState
|
26
|
+
|
27
|
+
if TYPE_CHECKING:
|
28
|
+
from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
|
29
|
+
|
30
|
+
|
31
|
+
class MwaaDagRunCompletedTrigger(AwsBaseWaiterTrigger):
|
32
|
+
"""
|
33
|
+
Trigger when an MWAA Dag Run is complete.
|
34
|
+
|
35
|
+
:param external_env_name: The external MWAA environment name that contains the DAG Run you want to wait for
|
36
|
+
(templated)
|
37
|
+
:param external_dag_id: The DAG ID in the external MWAA environment that contains the DAG Run you want to wait for
|
38
|
+
(templated)
|
39
|
+
:param external_dag_run_id: The DAG Run ID in the external MWAA environment that you want to wait for (templated)
|
40
|
+
:param success_states: Collection of DAG Run states that would make this task marked as successful, default is
|
41
|
+
``{airflow.utils.state.DagRunState.SUCCESS}`` (templated)
|
42
|
+
:param failure_states: Collection of DAG Run states that would make this task marked as failed and raise an
|
43
|
+
AirflowException, default is ``{airflow.utils.state.DagRunState.FAILED}`` (templated)
|
44
|
+
:param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
|
45
|
+
:param waiter_max_attempts: The maximum number of attempts to be made. (default: 720)
|
46
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
*,
|
52
|
+
external_env_name: str,
|
53
|
+
external_dag_id: str,
|
54
|
+
external_dag_run_id: str,
|
55
|
+
success_states: Collection[str] | None = None,
|
56
|
+
failure_states: Collection[str] | None = None,
|
57
|
+
waiter_delay: int = 60,
|
58
|
+
waiter_max_attempts: int = 720,
|
59
|
+
aws_conn_id: str | None = None,
|
60
|
+
) -> None:
|
61
|
+
self.success_states = set(success_states) if success_states else {DagRunState.SUCCESS.value}
|
62
|
+
self.failure_states = set(failure_states) if failure_states else {DagRunState.FAILED.value}
|
63
|
+
|
64
|
+
if len(self.success_states & self.failure_states):
|
65
|
+
raise ValueError("success_states and failure_states must not have any values in common")
|
66
|
+
|
67
|
+
in_progress_states = {s.value for s in DagRunState} - self.success_states - self.failure_states
|
68
|
+
|
69
|
+
super().__init__(
|
70
|
+
serialized_fields={
|
71
|
+
"external_env_name": external_env_name,
|
72
|
+
"external_dag_id": external_dag_id,
|
73
|
+
"external_dag_run_id": external_dag_run_id,
|
74
|
+
"success_states": success_states,
|
75
|
+
"failure_states": failure_states,
|
76
|
+
},
|
77
|
+
waiter_name="mwaa_dag_run_complete",
|
78
|
+
waiter_args={
|
79
|
+
"Name": external_env_name,
|
80
|
+
"Path": f"/dags/{external_dag_id}/dagRuns/{external_dag_run_id}",
|
81
|
+
"Method": "GET",
|
82
|
+
},
|
83
|
+
failure_message=f"The DAG run {external_dag_run_id} of DAG {external_dag_id} in MWAA environment {external_env_name} failed with state",
|
84
|
+
status_message="State of DAG run",
|
85
|
+
status_queries=["RestApiResponse.state"],
|
86
|
+
return_key="dag_run_id",
|
87
|
+
return_value=external_dag_run_id,
|
88
|
+
waiter_delay=waiter_delay,
|
89
|
+
waiter_max_attempts=waiter_max_attempts,
|
90
|
+
aws_conn_id=aws_conn_id,
|
91
|
+
waiter_config_overrides={
|
92
|
+
"acceptors": _build_waiter_acceptors(
|
93
|
+
success_states=self.success_states,
|
94
|
+
failure_states=self.failure_states,
|
95
|
+
in_progress_states=in_progress_states,
|
96
|
+
)
|
97
|
+
},
|
98
|
+
)
|
99
|
+
|
100
|
+
def hook(self) -> AwsGenericHook:
|
101
|
+
return MwaaHook(
|
102
|
+
aws_conn_id=self.aws_conn_id,
|
103
|
+
region_name=self.region_name,
|
104
|
+
verify=self.verify,
|
105
|
+
config=self.botocore_config,
|
106
|
+
)
|
107
|
+
|
108
|
+
|
109
|
+
def _build_waiter_acceptors(
|
110
|
+
success_states: set[str], failure_states: set[str], in_progress_states: set[str]
|
111
|
+
) -> list:
|
112
|
+
acceptors = []
|
113
|
+
for state_set, state_waiter_category in (
|
114
|
+
(success_states, "success"),
|
115
|
+
(failure_states, "failure"),
|
116
|
+
(in_progress_states, "retry"),
|
117
|
+
):
|
118
|
+
for dag_run_state in state_set:
|
119
|
+
acceptors.append(
|
120
|
+
{
|
121
|
+
"matcher": "path",
|
122
|
+
"argument": "RestApiResponse.state",
|
123
|
+
"expected": dag_run_state,
|
124
|
+
"state": state_waiter_category,
|
125
|
+
}
|
126
|
+
)
|
127
|
+
|
128
|
+
return acceptors
|