apache-airflow-providers-amazon 9.4.0rc1__py3-none-any.whl → 9.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/auth_manager/avp/entities.py +1 -1
- airflow/providers/amazon/aws/auth_manager/avp/facade.py +1 -1
- airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +21 -100
- airflow/providers/amazon/aws/auth_manager/router/login.py +3 -2
- airflow/providers/amazon/aws/auth_manager/user.py +7 -4
- airflow/providers/amazon/aws/hooks/base_aws.py +25 -0
- airflow/providers/amazon/aws/hooks/ec2.py +1 -1
- airflow/providers/amazon/aws/hooks/glue.py +6 -2
- airflow/providers/amazon/aws/hooks/logs.py +2 -2
- airflow/providers/amazon/aws/hooks/mwaa.py +79 -15
- airflow/providers/amazon/aws/hooks/redshift_cluster.py +1 -1
- airflow/providers/amazon/aws/hooks/redshift_data.py +2 -2
- airflow/providers/amazon/aws/hooks/sagemaker.py +1 -1
- airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +188 -0
- airflow/providers/amazon/aws/links/base_aws.py +7 -1
- airflow/providers/amazon/aws/links/sagemaker_unified_studio.py +27 -0
- airflow/providers/amazon/aws/log/s3_task_handler.py +22 -7
- airflow/providers/amazon/aws/operators/s3.py +147 -157
- airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +155 -0
- airflow/providers/amazon/aws/sensors/emr.py +1 -1
- airflow/providers/amazon/aws/sensors/mwaa.py +113 -0
- airflow/providers/amazon/aws/sensors/rds.py +10 -5
- airflow/providers/amazon/aws/sensors/s3.py +31 -42
- airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +73 -0
- airflow/providers/amazon/aws/triggers/README.md +4 -4
- airflow/providers/amazon/aws/triggers/base.py +1 -1
- airflow/providers/amazon/aws/triggers/ecs.py +6 -2
- airflow/providers/amazon/aws/triggers/eks.py +2 -2
- airflow/providers/amazon/aws/triggers/glue.py +1 -1
- airflow/providers/amazon/aws/triggers/s3.py +31 -6
- airflow/providers/amazon/aws/triggers/sagemaker.py +2 -2
- airflow/providers/amazon/aws/triggers/sagemaker_unified_studio.py +66 -0
- airflow/providers/amazon/aws/triggers/sqs.py +11 -3
- airflow/providers/amazon/aws/{auth_manager/security_manager/__init__.py → utils/sagemaker_unified_studio.py} +12 -0
- airflow/providers/amazon/get_provider_info.py +36 -1
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc1.dist-info}/METADATA +30 -25
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc1.dist-info}/RECORD +40 -35
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc1.dist-info}/WHEEL +1 -1
- airflow/providers/amazon/aws/auth_manager/security_manager/aws_security_manager_override.py +0 -40
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc1.dist-info}/entry_points.txt +0 -0
@@ -29,8 +29,9 @@ import pytz
|
|
29
29
|
from dateutil import parser
|
30
30
|
|
31
31
|
from airflow.exceptions import AirflowException
|
32
|
-
from airflow.models import BaseOperator
|
33
32
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
33
|
+
from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
|
34
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
34
35
|
from airflow.utils.helpers import exactly_one
|
35
36
|
|
36
37
|
if TYPE_CHECKING:
|
@@ -41,7 +42,7 @@ if TYPE_CHECKING:
|
|
41
42
|
BUCKET_DOES_NOT_EXIST_MSG = "Bucket with name: %s doesn't exist"
|
42
43
|
|
43
44
|
|
44
|
-
class S3CreateBucketOperator(
|
45
|
+
class S3CreateBucketOperator(AwsBaseOperator[S3Hook]):
|
45
46
|
"""
|
46
47
|
This operator creates an S3 bucket.
|
47
48
|
|
@@ -51,38 +52,38 @@ class S3CreateBucketOperator(BaseOperator):
|
|
51
52
|
|
52
53
|
:param bucket_name: This is bucket name you want to create
|
53
54
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
54
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
55
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
55
56
|
running Airflow in a distributed manner and aws_conn_id is None or
|
56
57
|
empty, then default boto3 configuration would be used (and must be
|
57
58
|
maintained on each worker node).
|
58
|
-
:param region_name: AWS region_name. If not specified
|
59
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
60
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
61
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
62
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
63
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
59
64
|
"""
|
60
65
|
|
61
|
-
template_fields: Sequence[str] = ("bucket_name"
|
66
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_name")
|
67
|
+
aws_hook_class = S3Hook
|
62
68
|
|
63
69
|
def __init__(
|
64
70
|
self,
|
65
71
|
*,
|
66
72
|
bucket_name: str,
|
67
|
-
aws_conn_id: str | None = "aws_default",
|
68
|
-
region_name: str | None = None,
|
69
73
|
**kwargs,
|
70
74
|
) -> None:
|
71
75
|
super().__init__(**kwargs)
|
72
76
|
self.bucket_name = bucket_name
|
73
|
-
self.region_name = region_name
|
74
|
-
self.aws_conn_id = aws_conn_id
|
75
77
|
|
76
78
|
def execute(self, context: Context):
|
77
|
-
|
78
|
-
|
79
|
-
s3_hook.create_bucket(bucket_name=self.bucket_name, region_name=self.region_name)
|
79
|
+
if not self.hook.check_for_bucket(self.bucket_name):
|
80
|
+
self.hook.create_bucket(bucket_name=self.bucket_name, region_name=self.region_name)
|
80
81
|
self.log.info("Created bucket with name: %s", self.bucket_name)
|
81
82
|
else:
|
82
83
|
self.log.info("Bucket with name: %s already exists", self.bucket_name)
|
83
84
|
|
84
85
|
|
85
|
-
class S3DeleteBucketOperator(
|
86
|
+
class S3DeleteBucketOperator(AwsBaseOperator[S3Hook]):
|
86
87
|
"""
|
87
88
|
This operator deletes an S3 bucket.
|
88
89
|
|
@@ -93,36 +94,39 @@ class S3DeleteBucketOperator(BaseOperator):
|
|
93
94
|
:param bucket_name: This is bucket name you want to delete
|
94
95
|
:param force_delete: Forcibly delete all objects in the bucket before deleting the bucket
|
95
96
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
96
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
97
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
97
98
|
running Airflow in a distributed manner and aws_conn_id is None or
|
98
99
|
empty, then default boto3 configuration would be used (and must be
|
99
100
|
maintained on each worker node).
|
101
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
102
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
103
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
104
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
105
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
100
106
|
"""
|
101
107
|
|
102
|
-
template_fields: Sequence[str] = ("bucket_name"
|
108
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_name")
|
109
|
+
aws_hook_class = S3Hook
|
103
110
|
|
104
111
|
def __init__(
|
105
112
|
self,
|
106
113
|
bucket_name: str,
|
107
114
|
force_delete: bool = False,
|
108
|
-
aws_conn_id: str | None = "aws_default",
|
109
115
|
**kwargs,
|
110
116
|
) -> None:
|
111
117
|
super().__init__(**kwargs)
|
112
118
|
self.bucket_name = bucket_name
|
113
119
|
self.force_delete = force_delete
|
114
|
-
self.aws_conn_id = aws_conn_id
|
115
120
|
|
116
121
|
def execute(self, context: Context):
|
117
|
-
|
118
|
-
|
119
|
-
s3_hook.delete_bucket(bucket_name=self.bucket_name, force_delete=self.force_delete)
|
122
|
+
if self.hook.check_for_bucket(self.bucket_name):
|
123
|
+
self.hook.delete_bucket(bucket_name=self.bucket_name, force_delete=self.force_delete)
|
120
124
|
self.log.info("Deleted bucket with name: %s", self.bucket_name)
|
121
125
|
else:
|
122
126
|
self.log.info("Bucket with name: %s doesn't exist", self.bucket_name)
|
123
127
|
|
124
128
|
|
125
|
-
class S3GetBucketTaggingOperator(
|
129
|
+
class S3GetBucketTaggingOperator(AwsBaseOperator[S3Hook]):
|
126
130
|
"""
|
127
131
|
This operator gets tagging from an S3 bucket.
|
128
132
|
|
@@ -132,31 +136,34 @@ class S3GetBucketTaggingOperator(BaseOperator):
|
|
132
136
|
|
133
137
|
:param bucket_name: This is bucket name you want to reference
|
134
138
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
135
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
139
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
136
140
|
running Airflow in a distributed manner and aws_conn_id is None or
|
137
141
|
empty, then default boto3 configuration would be used (and must be
|
138
142
|
maintained on each worker node).
|
143
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
144
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
145
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
146
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
147
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
139
148
|
"""
|
140
149
|
|
141
|
-
template_fields: Sequence[str] = ("bucket_name"
|
150
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_name")
|
151
|
+
aws_hook_class = S3Hook
|
142
152
|
|
143
153
|
def __init__(self, bucket_name: str, aws_conn_id: str | None = "aws_default", **kwargs) -> None:
|
144
154
|
super().__init__(**kwargs)
|
145
155
|
self.bucket_name = bucket_name
|
146
|
-
self.aws_conn_id = aws_conn_id
|
147
156
|
|
148
157
|
def execute(self, context: Context):
|
149
|
-
|
150
|
-
|
151
|
-
if s3_hook.check_for_bucket(self.bucket_name):
|
158
|
+
if self.hook.check_for_bucket(self.bucket_name):
|
152
159
|
self.log.info("Getting tags for bucket %s", self.bucket_name)
|
153
|
-
return
|
160
|
+
return self.hook.get_bucket_tagging(self.bucket_name)
|
154
161
|
else:
|
155
162
|
self.log.warning(BUCKET_DOES_NOT_EXIST_MSG, self.bucket_name)
|
156
163
|
return None
|
157
164
|
|
158
165
|
|
159
|
-
class S3PutBucketTaggingOperator(
|
166
|
+
class S3PutBucketTaggingOperator(AwsBaseOperator[S3Hook]):
|
160
167
|
"""
|
161
168
|
This operator puts tagging for an S3 bucket.
|
162
169
|
|
@@ -171,14 +178,20 @@ class S3PutBucketTaggingOperator(BaseOperator):
|
|
171
178
|
If a value is provided, a key must be provided as well.
|
172
179
|
:param tag_set: A dictionary containing the tags, or a List of key/value pairs.
|
173
180
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
174
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
181
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
175
182
|
running Airflow in a distributed manner and aws_conn_id is None or
|
176
|
-
empty, then
|
183
|
+
empty, then default boto3 configuration would be used (and must be
|
177
184
|
maintained on each worker node).
|
185
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
186
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
187
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
188
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
189
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
178
190
|
"""
|
179
191
|
|
180
|
-
template_fields: Sequence[str] = ("bucket_name"
|
192
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_name")
|
181
193
|
template_fields_renderers = {"tag_set": "json"}
|
194
|
+
aws_hook_class = S3Hook
|
182
195
|
|
183
196
|
def __init__(
|
184
197
|
self,
|
@@ -186,7 +199,6 @@ class S3PutBucketTaggingOperator(BaseOperator):
|
|
186
199
|
key: str | None = None,
|
187
200
|
value: str | None = None,
|
188
201
|
tag_set: dict | list[dict[str, str]] | None = None,
|
189
|
-
aws_conn_id: str | None = "aws_default",
|
190
202
|
**kwargs,
|
191
203
|
) -> None:
|
192
204
|
super().__init__(**kwargs)
|
@@ -194,14 +206,11 @@ class S3PutBucketTaggingOperator(BaseOperator):
|
|
194
206
|
self.value = value
|
195
207
|
self.tag_set = tag_set
|
196
208
|
self.bucket_name = bucket_name
|
197
|
-
self.aws_conn_id = aws_conn_id
|
198
209
|
|
199
210
|
def execute(self, context: Context):
|
200
|
-
|
201
|
-
|
202
|
-
if s3_hook.check_for_bucket(self.bucket_name):
|
211
|
+
if self.hook.check_for_bucket(self.bucket_name):
|
203
212
|
self.log.info("Putting tags for bucket %s", self.bucket_name)
|
204
|
-
return
|
213
|
+
return self.hook.put_bucket_tagging(
|
205
214
|
key=self.key, value=self.value, tag_set=self.tag_set, bucket_name=self.bucket_name
|
206
215
|
)
|
207
216
|
else:
|
@@ -209,7 +218,7 @@ class S3PutBucketTaggingOperator(BaseOperator):
|
|
209
218
|
return None
|
210
219
|
|
211
220
|
|
212
|
-
class S3DeleteBucketTaggingOperator(
|
221
|
+
class S3DeleteBucketTaggingOperator(AwsBaseOperator[S3Hook]):
|
213
222
|
"""
|
214
223
|
This operator deletes tagging from an S3 bucket.
|
215
224
|
|
@@ -219,31 +228,38 @@ class S3DeleteBucketTaggingOperator(BaseOperator):
|
|
219
228
|
|
220
229
|
:param bucket_name: This is the name of the bucket to delete tags from.
|
221
230
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
222
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
231
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
223
232
|
running Airflow in a distributed manner and aws_conn_id is None or
|
224
233
|
empty, then default boto3 configuration would be used (and must be
|
225
234
|
maintained on each worker node).
|
235
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
236
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
237
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
238
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
239
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
226
240
|
"""
|
227
241
|
|
228
|
-
template_fields: Sequence[str] = ("bucket_name"
|
242
|
+
template_fields: Sequence[str] = aws_template_fields("bucket_name")
|
243
|
+
aws_hook_class = S3Hook
|
229
244
|
|
230
|
-
def __init__(
|
245
|
+
def __init__(
|
246
|
+
self,
|
247
|
+
bucket_name: str,
|
248
|
+
**kwargs,
|
249
|
+
) -> None:
|
231
250
|
super().__init__(**kwargs)
|
232
251
|
self.bucket_name = bucket_name
|
233
|
-
self.aws_conn_id = aws_conn_id
|
234
252
|
|
235
253
|
def execute(self, context: Context):
|
236
|
-
|
237
|
-
|
238
|
-
if s3_hook.check_for_bucket(self.bucket_name):
|
254
|
+
if self.hook.check_for_bucket(self.bucket_name):
|
239
255
|
self.log.info("Deleting tags for bucket %s", self.bucket_name)
|
240
|
-
return
|
256
|
+
return self.hook.delete_bucket_tagging(self.bucket_name)
|
241
257
|
else:
|
242
258
|
self.log.warning(BUCKET_DOES_NOT_EXIST_MSG, self.bucket_name)
|
243
259
|
return None
|
244
260
|
|
245
261
|
|
246
|
-
class S3CopyObjectOperator(
|
262
|
+
class S3CopyObjectOperator(AwsBaseOperator[S3Hook]):
|
247
263
|
"""
|
248
264
|
Creates a copy of an object that is already stored in S3.
|
249
265
|
|
@@ -269,30 +285,29 @@ class S3CopyObjectOperator(BaseOperator):
|
|
269
285
|
|
270
286
|
It should be omitted when `dest_bucket_key` is provided as a full s3:// url.
|
271
287
|
:param source_version_id: Version ID of the source object (OPTIONAL)
|
272
|
-
:param aws_conn_id:
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
You can specify this argument if you want to use a different
|
283
|
-
CA cert bundle than the one used by botocore.
|
288
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
289
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
290
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
291
|
+
empty, then default boto3 configuration would be used (and must be
|
292
|
+
maintained on each worker node).
|
293
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
294
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
295
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
296
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
297
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
284
298
|
:param acl_policy: String specifying the canned ACL policy for the file being
|
285
299
|
uploaded to the S3 bucket.
|
286
300
|
:param meta_data_directive: Whether to `COPY` the metadata from the source object or `REPLACE` it with
|
287
301
|
metadata that's provided in the request.
|
288
302
|
"""
|
289
303
|
|
290
|
-
template_fields: Sequence[str] = (
|
304
|
+
template_fields: Sequence[str] = aws_template_fields(
|
291
305
|
"source_bucket_key",
|
292
306
|
"dest_bucket_key",
|
293
307
|
"source_bucket_name",
|
294
308
|
"dest_bucket_name",
|
295
309
|
)
|
310
|
+
aws_hook_class = S3Hook
|
296
311
|
|
297
312
|
def __init__(
|
298
313
|
self,
|
@@ -302,8 +317,6 @@ class S3CopyObjectOperator(BaseOperator):
|
|
302
317
|
source_bucket_name: str | None = None,
|
303
318
|
dest_bucket_name: str | None = None,
|
304
319
|
source_version_id: str | None = None,
|
305
|
-
aws_conn_id: str | None = "aws_default",
|
306
|
-
verify: str | bool | None = None,
|
307
320
|
acl_policy: str | None = None,
|
308
321
|
meta_data_directive: str | None = None,
|
309
322
|
**kwargs,
|
@@ -315,14 +328,11 @@ class S3CopyObjectOperator(BaseOperator):
|
|
315
328
|
self.source_bucket_name = source_bucket_name
|
316
329
|
self.dest_bucket_name = dest_bucket_name
|
317
330
|
self.source_version_id = source_version_id
|
318
|
-
self.aws_conn_id = aws_conn_id
|
319
|
-
self.verify = verify
|
320
331
|
self.acl_policy = acl_policy
|
321
332
|
self.meta_data_directive = meta_data_directive
|
322
333
|
|
323
334
|
def execute(self, context: Context):
|
324
|
-
|
325
|
-
s3_hook.copy_object(
|
335
|
+
self.hook.copy_object(
|
326
336
|
self.source_bucket_key,
|
327
337
|
self.dest_bucket_key,
|
328
338
|
self.source_bucket_name,
|
@@ -336,11 +346,11 @@ class S3CopyObjectOperator(BaseOperator):
|
|
336
346
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
337
347
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
338
348
|
|
339
|
-
dest_bucket_name, dest_bucket_key =
|
349
|
+
dest_bucket_name, dest_bucket_key = self.hook.get_s3_bucket_key(
|
340
350
|
self.dest_bucket_name, self.dest_bucket_key, "dest_bucket_name", "dest_bucket_key"
|
341
351
|
)
|
342
352
|
|
343
|
-
source_bucket_name, source_bucket_key =
|
353
|
+
source_bucket_name, source_bucket_key = self.hook.get_s3_bucket_key(
|
344
354
|
self.source_bucket_name, self.source_bucket_key, "source_bucket_name", "source_bucket_key"
|
345
355
|
)
|
346
356
|
|
@@ -359,7 +369,7 @@ class S3CopyObjectOperator(BaseOperator):
|
|
359
369
|
)
|
360
370
|
|
361
371
|
|
362
|
-
class S3CreateObjectOperator(
|
372
|
+
class S3CreateObjectOperator(AwsBaseOperator[S3Hook]):
|
363
373
|
"""
|
364
374
|
Creates a new object from `data` as string or bytes.
|
365
375
|
|
@@ -382,22 +392,21 @@ class S3CreateObjectOperator(BaseOperator):
|
|
382
392
|
It should be specified only when `data` is provided as string.
|
383
393
|
:param compression: Type of compression to use, currently only gzip is supported.
|
384
394
|
It can be specified only when `data` is provided as string.
|
385
|
-
:param aws_conn_id:
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
You can specify this argument if you want to use a different
|
396
|
-
CA cert bundle than the one used by botocore.
|
395
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
396
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
397
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
398
|
+
empty, then default boto3 configuration would be used (and must be
|
399
|
+
maintained on each worker node).
|
400
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
401
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
402
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
403
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
404
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
397
405
|
|
398
406
|
"""
|
399
407
|
|
400
|
-
template_fields: Sequence[str] = ("s3_bucket", "s3_key", "data")
|
408
|
+
template_fields: Sequence[str] = aws_template_fields("s3_bucket", "s3_key", "data")
|
409
|
+
aws_hook_class = S3Hook
|
401
410
|
|
402
411
|
def __init__(
|
403
412
|
self,
|
@@ -410,8 +419,6 @@ class S3CreateObjectOperator(BaseOperator):
|
|
410
419
|
acl_policy: str | None = None,
|
411
420
|
encoding: str | None = None,
|
412
421
|
compression: str | None = None,
|
413
|
-
aws_conn_id: str | None = "aws_default",
|
414
|
-
verify: str | bool | None = None,
|
415
422
|
**kwargs,
|
416
423
|
):
|
417
424
|
super().__init__(**kwargs)
|
@@ -424,16 +431,14 @@ class S3CreateObjectOperator(BaseOperator):
|
|
424
431
|
self.acl_policy = acl_policy
|
425
432
|
self.encoding = encoding
|
426
433
|
self.compression = compression
|
427
|
-
self.aws_conn_id = aws_conn_id
|
428
|
-
self.verify = verify
|
429
434
|
|
430
435
|
def execute(self, context: Context):
|
431
|
-
|
432
|
-
|
433
|
-
|
436
|
+
s3_bucket, s3_key = self.hook.get_s3_bucket_key(
|
437
|
+
self.s3_bucket, self.s3_key, "dest_bucket", "dest_key"
|
438
|
+
)
|
434
439
|
|
435
440
|
if isinstance(self.data, str):
|
436
|
-
|
441
|
+
self.hook.load_string(
|
437
442
|
self.data,
|
438
443
|
s3_key,
|
439
444
|
s3_bucket,
|
@@ -444,13 +449,13 @@ class S3CreateObjectOperator(BaseOperator):
|
|
444
449
|
self.compression,
|
445
450
|
)
|
446
451
|
else:
|
447
|
-
|
452
|
+
self.hook.load_bytes(self.data, s3_key, s3_bucket, self.replace, self.encrypt, self.acl_policy)
|
448
453
|
|
449
454
|
def get_openlineage_facets_on_start(self):
|
450
455
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
451
456
|
from airflow.providers.openlineage.extractors import OperatorLineage
|
452
457
|
|
453
|
-
bucket, key =
|
458
|
+
bucket, key = self.hook.get_s3_bucket_key(self.s3_bucket, self.s3_key, "dest_bucket", "dest_key")
|
454
459
|
|
455
460
|
output_dataset = Dataset(
|
456
461
|
namespace=f"s3://{bucket}",
|
@@ -462,7 +467,7 @@ class S3CreateObjectOperator(BaseOperator):
|
|
462
467
|
)
|
463
468
|
|
464
469
|
|
465
|
-
class S3DeleteObjectsOperator(
|
470
|
+
class S3DeleteObjectsOperator(AwsBaseOperator[S3Hook]):
|
466
471
|
"""
|
467
472
|
To enable users to delete single object or multiple objects from a bucket using a single HTTP request.
|
468
473
|
|
@@ -485,21 +490,22 @@ class S3DeleteObjectsOperator(BaseOperator):
|
|
485
490
|
All objects which LastModified Date is greater than this datetime in the bucket will be deleted.
|
486
491
|
:param to_datetime: less LastModified Date of objects to delete. (templated)
|
487
492
|
All objects which LastModified Date is less than this datetime in the bucket will be deleted.
|
488
|
-
:param aws_conn_id:
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
You can specify this argument if you want to use a different
|
499
|
-
CA cert bundle than the one used by botocore.
|
493
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
494
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
495
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
496
|
+
empty, then default boto3 configuration would be used (and must be
|
497
|
+
maintained on each worker node).
|
498
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
499
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
500
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
501
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
502
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
500
503
|
"""
|
501
504
|
|
502
|
-
template_fields: Sequence[str] = (
|
505
|
+
template_fields: Sequence[str] = aws_template_fields(
|
506
|
+
"keys", "bucket", "prefix", "from_datetime", "to_datetime"
|
507
|
+
)
|
508
|
+
aws_hook_class = S3Hook
|
503
509
|
|
504
510
|
def __init__(
|
505
511
|
self,
|
@@ -509,8 +515,6 @@ class S3DeleteObjectsOperator(BaseOperator):
|
|
509
515
|
prefix: str | None = None,
|
510
516
|
from_datetime: datetime | str | None = None,
|
511
517
|
to_datetime: datetime | str | None = None,
|
512
|
-
aws_conn_id: str | None = "aws_default",
|
513
|
-
verify: str | bool | None = None,
|
514
518
|
**kwargs,
|
515
519
|
):
|
516
520
|
super().__init__(**kwargs)
|
@@ -519,8 +523,6 @@ class S3DeleteObjectsOperator(BaseOperator):
|
|
519
523
|
self.prefix = prefix
|
520
524
|
self.from_datetime = from_datetime
|
521
525
|
self.to_datetime = to_datetime
|
522
|
-
self.aws_conn_id = aws_conn_id
|
523
|
-
self.verify = verify
|
524
526
|
|
525
527
|
self._keys: str | list[str] = ""
|
526
528
|
|
@@ -546,16 +548,14 @@ class S3DeleteObjectsOperator(BaseOperator):
|
|
546
548
|
if isinstance(self.from_datetime, str):
|
547
549
|
self.from_datetime = parser.parse(self.from_datetime).replace(tzinfo=pytz.UTC)
|
548
550
|
|
549
|
-
|
550
|
-
|
551
|
-
keys = self.keys or s3_hook.list_keys(
|
551
|
+
keys = self.keys or self.hook.list_keys(
|
552
552
|
bucket_name=self.bucket,
|
553
553
|
prefix=self.prefix,
|
554
554
|
from_datetime=self.from_datetime,
|
555
555
|
to_datetime=self.to_datetime,
|
556
556
|
)
|
557
557
|
if keys:
|
558
|
-
|
558
|
+
self.hook.delete_objects(bucket=self.bucket, keys=keys)
|
559
559
|
self._keys = keys
|
560
560
|
|
561
561
|
def get_openlineage_facets_on_complete(self, task_instance):
|
@@ -598,7 +598,7 @@ class S3DeleteObjectsOperator(BaseOperator):
|
|
598
598
|
)
|
599
599
|
|
600
600
|
|
601
|
-
class S3FileTransformOperator(
|
601
|
+
class S3FileTransformOperator(AwsBaseOperator[S3Hook]):
|
602
602
|
"""
|
603
603
|
Copies data from a source S3 location to a temporary location on the local filesystem.
|
604
604
|
|
@@ -644,9 +644,10 @@ class S3FileTransformOperator(BaseOperator):
|
|
644
644
|
:param replace: Replace dest S3 key if it already exists
|
645
645
|
"""
|
646
646
|
|
647
|
-
template_fields: Sequence[str] = ("source_s3_key", "dest_s3_key", "script_args")
|
647
|
+
template_fields: Sequence[str] = aws_template_fields("source_s3_key", "dest_s3_key", "script_args")
|
648
648
|
template_ext: Sequence[str] = ()
|
649
649
|
ui_color = "#f9c915"
|
650
|
+
aws_hook_class = S3Hook
|
650
651
|
|
651
652
|
def __init__(
|
652
653
|
self,
|
@@ -682,6 +683,7 @@ class S3FileTransformOperator(BaseOperator):
|
|
682
683
|
if self.transform_script is None and self.select_expression is None:
|
683
684
|
raise AirflowException("Either transform_script or select_expression must be specified")
|
684
685
|
|
686
|
+
# Keep these hooks constructed here since we are using two unique conn_ids
|
685
687
|
source_s3 = S3Hook(aws_conn_id=self.source_aws_conn_id, verify=self.source_verify)
|
686
688
|
dest_s3 = S3Hook(aws_conn_id=self.dest_aws_conn_id, verify=self.dest_verify)
|
687
689
|
|
@@ -770,7 +772,7 @@ class S3FileTransformOperator(BaseOperator):
|
|
770
772
|
)
|
771
773
|
|
772
774
|
|
773
|
-
class S3ListOperator(
|
775
|
+
class S3ListOperator(AwsBaseOperator[S3Hook]):
|
774
776
|
"""
|
775
777
|
List all objects from the bucket with the given string prefix in name.
|
776
778
|
|
@@ -785,17 +787,16 @@ class S3ListOperator(BaseOperator):
|
|
785
787
|
:param prefix: Prefix string to filters the objects whose name begin with
|
786
788
|
such prefix. (templated)
|
787
789
|
:param delimiter: the delimiter marks key hierarchy. (templated)
|
788
|
-
:param aws_conn_id: The connection
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
CA cert bundle than the one used by botocore.
|
790
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
791
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
792
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
793
|
+
empty, then default boto3 configuration would be used (and must be
|
794
|
+
maintained on each worker node).
|
795
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
796
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
797
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
798
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
799
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
799
800
|
:param apply_wildcard: whether to treat '*' as a wildcard or a plain symbol in the prefix.
|
800
801
|
|
801
802
|
|
@@ -813,8 +814,9 @@ class S3ListOperator(BaseOperator):
|
|
813
814
|
)
|
814
815
|
"""
|
815
816
|
|
816
|
-
template_fields: Sequence[str] = ("bucket", "prefix", "delimiter")
|
817
|
+
template_fields: Sequence[str] = aws_template_fields("bucket", "prefix", "delimiter")
|
817
818
|
ui_color = "#ffd700"
|
819
|
+
aws_hook_class = S3Hook
|
818
820
|
|
819
821
|
def __init__(
|
820
822
|
self,
|
@@ -822,8 +824,6 @@ class S3ListOperator(BaseOperator):
|
|
822
824
|
bucket: str,
|
823
825
|
prefix: str = "",
|
824
826
|
delimiter: str = "",
|
825
|
-
aws_conn_id: str | None = "aws_default",
|
826
|
-
verify: str | bool | None = None,
|
827
827
|
apply_wildcard: bool = False,
|
828
828
|
**kwargs,
|
829
829
|
):
|
@@ -831,13 +831,9 @@ class S3ListOperator(BaseOperator):
|
|
831
831
|
self.bucket = bucket
|
832
832
|
self.prefix = prefix
|
833
833
|
self.delimiter = delimiter
|
834
|
-
self.aws_conn_id = aws_conn_id
|
835
|
-
self.verify = verify
|
836
834
|
self.apply_wildcard = apply_wildcard
|
837
835
|
|
838
836
|
def execute(self, context: Context):
|
839
|
-
hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
|
840
|
-
|
841
837
|
self.log.info(
|
842
838
|
"Getting the list of files from bucket: %s in prefix: %s (Delimiter %s)",
|
843
839
|
self.bucket,
|
@@ -845,7 +841,7 @@ class S3ListOperator(BaseOperator):
|
|
845
841
|
self.delimiter,
|
846
842
|
)
|
847
843
|
|
848
|
-
return hook.list_keys(
|
844
|
+
return self.hook.list_keys(
|
849
845
|
bucket_name=self.bucket,
|
850
846
|
prefix=self.prefix,
|
851
847
|
delimiter=self.delimiter,
|
@@ -853,7 +849,7 @@ class S3ListOperator(BaseOperator):
|
|
853
849
|
)
|
854
850
|
|
855
851
|
|
856
|
-
class S3ListPrefixesOperator(
|
852
|
+
class S3ListPrefixesOperator(AwsBaseOperator[S3Hook]):
|
857
853
|
"""
|
858
854
|
List all subfolders from the bucket with the given string prefix in name.
|
859
855
|
|
@@ -868,17 +864,16 @@ class S3ListPrefixesOperator(BaseOperator):
|
|
868
864
|
:param prefix: Prefix string to filter the subfolders whose name begin with
|
869
865
|
such prefix. (templated)
|
870
866
|
:param delimiter: the delimiter marks subfolder hierarchy. (templated)
|
871
|
-
:param aws_conn_id: The connection
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
CA cert bundle than the one used by botocore.
|
867
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
868
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
869
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
870
|
+
empty, then default boto3 configuration would be used (and must be
|
871
|
+
maintained on each worker node).
|
872
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
873
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
874
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
875
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
876
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
882
877
|
|
883
878
|
|
884
879
|
**Example**:
|
@@ -894,8 +889,9 @@ class S3ListPrefixesOperator(BaseOperator):
|
|
894
889
|
)
|
895
890
|
"""
|
896
891
|
|
897
|
-
template_fields: Sequence[str] = ("bucket", "prefix", "delimiter")
|
892
|
+
template_fields: Sequence[str] = aws_template_fields("bucket", "prefix", "delimiter")
|
898
893
|
ui_color = "#ffd700"
|
894
|
+
aws_hook_class = S3Hook
|
899
895
|
|
900
896
|
def __init__(
|
901
897
|
self,
|
@@ -903,20 +899,14 @@ class S3ListPrefixesOperator(BaseOperator):
|
|
903
899
|
bucket: str,
|
904
900
|
prefix: str,
|
905
901
|
delimiter: str,
|
906
|
-
aws_conn_id: str | None = "aws_default",
|
907
|
-
verify: str | bool | None = None,
|
908
902
|
**kwargs,
|
909
903
|
):
|
910
904
|
super().__init__(**kwargs)
|
911
905
|
self.bucket = bucket
|
912
906
|
self.prefix = prefix
|
913
907
|
self.delimiter = delimiter
|
914
|
-
self.aws_conn_id = aws_conn_id
|
915
|
-
self.verify = verify
|
916
908
|
|
917
909
|
def execute(self, context: Context):
|
918
|
-
hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
|
919
|
-
|
920
910
|
self.log.info(
|
921
911
|
"Getting the list of subfolders from bucket: %s in prefix: %s (Delimiter %s)",
|
922
912
|
self.bucket,
|
@@ -924,4 +914,4 @@ class S3ListPrefixesOperator(BaseOperator):
|
|
924
914
|
self.delimiter,
|
925
915
|
)
|
926
916
|
|
927
|
-
return hook.list_prefixes(bucket_name=self.bucket, prefix=self.prefix, delimiter=self.delimiter)
|
917
|
+
return self.hook.list_prefixes(bucket_name=self.bucket, prefix=self.prefix, delimiter=self.delimiter)
|