apache-airflow-providers-amazon 9.0.0rc1__py3-none-any.whl → 9.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/assets/s3.py +7 -7
- airflow/providers/amazon/aws/hooks/appflow.py +4 -4
- airflow/providers/amazon/aws/hooks/redshift_sql.py +1 -1
- airflow/providers/amazon/aws/hooks/s3.py +77 -29
- airflow/providers/amazon/aws/hooks/sagemaker.py +2 -0
- airflow/providers/amazon/aws/operators/redshift_data.py +43 -20
- airflow/providers/amazon/aws/sensors/sagemaker.py +32 -0
- airflow/providers/amazon/aws/transfers/redshift_to_s3.py +106 -7
- airflow/providers/amazon/aws/transfers/s3_to_redshift.py +1 -1
- airflow/providers/amazon/aws/triggers/ecs.py +6 -6
- airflow/providers/amazon/get_provider_info.py +3 -3
- {apache_airflow_providers_amazon-9.0.0rc1.dist-info → apache_airflow_providers_amazon-9.1.0rc1.dist-info}/METADATA +8 -10
- {apache_airflow_providers_amazon-9.0.0rc1.dist-info → apache_airflow_providers_amazon-9.1.0rc1.dist-info}/RECORD +16 -17
- airflow/providers/amazon/aws/utils/asset_compat_lineage_collector.py +0 -106
- {apache_airflow_providers_amazon-9.0.0rc1.dist-info → apache_airflow_providers_amazon-9.1.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_amazon-9.0.0rc1.dist-info → apache_airflow_providers_amazon-9.1.0rc1.dist-info}/entry_points.txt +0 -0
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
29
29
|
|
30
30
|
__all__ = ["__version__"]
|
31
31
|
|
32
|
-
__version__ = "9.
|
32
|
+
__version__ = "9.1.0"
|
33
33
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
35
35
|
"2.8.0"
|
@@ -19,16 +19,14 @@ from __future__ import annotations
|
|
19
19
|
from typing import TYPE_CHECKING
|
20
20
|
|
21
21
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
22
|
-
|
23
|
-
try:
|
24
|
-
from airflow.assets import Asset
|
25
|
-
except ModuleNotFoundError:
|
26
|
-
from airflow.datasets import Dataset as Asset # type: ignore[no-redef]
|
22
|
+
from airflow.providers.common.compat.assets import Asset
|
27
23
|
|
28
24
|
if TYPE_CHECKING:
|
29
25
|
from urllib.parse import SplitResult
|
30
26
|
|
31
|
-
from airflow.providers.common.compat.openlineage.facet import
|
27
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
28
|
+
Dataset as OpenLineageDataset,
|
29
|
+
)
|
32
30
|
|
33
31
|
|
34
32
|
def create_asset(*, bucket: str, key: str, extra=None) -> Asset:
|
@@ -43,7 +41,9 @@ def sanitize_uri(uri: SplitResult) -> SplitResult:
|
|
43
41
|
|
44
42
|
def convert_asset_to_openlineage(asset: Asset, lineage_context) -> OpenLineageDataset:
|
45
43
|
"""Translate Asset with valid AIP-60 uri to OpenLineage with assistance from the hook."""
|
46
|
-
from airflow.providers.common.compat.openlineage.facet import
|
44
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
45
|
+
Dataset as OpenLineageDataset,
|
46
|
+
)
|
47
47
|
|
48
48
|
bucket, key = S3Hook.parse_s3_url(asset.uri)
|
49
49
|
return OpenLineageDataset(namespace=f"s3://{bucket}", name=key if key else "/")
|
@@ -117,9 +117,9 @@ class AppflowHook(AwsGenericHook["AppflowClient"]):
|
|
117
117
|
|
118
118
|
self.conn.update_flow(
|
119
119
|
flowName=response["flowName"],
|
120
|
-
destinationFlowConfigList=response["destinationFlowConfigList"],
|
121
|
-
sourceFlowConfig=response["sourceFlowConfig"],
|
122
|
-
triggerConfig=response["triggerConfig"],
|
120
|
+
destinationFlowConfigList=response["destinationFlowConfigList"], # type: ignore[arg-type]
|
121
|
+
sourceFlowConfig=response["sourceFlowConfig"], # type: ignore[arg-type]
|
122
|
+
triggerConfig=response["triggerConfig"], # type: ignore[arg-type]
|
123
123
|
description=response.get("description", "Flow description."),
|
124
|
-
tasks=tasks,
|
124
|
+
tasks=tasks, # type: ignore[arg-type]
|
125
125
|
)
|
@@ -163,7 +163,7 @@ class RedshiftSQLHook(DbApiHook):
|
|
163
163
|
# Compatibility: The 'create' factory method was added in SQLAlchemy 1.4
|
164
164
|
# to replace calling the default URL constructor directly.
|
165
165
|
create_url = getattr(URL, "create", URL)
|
166
|
-
return str(create_url(drivername="
|
166
|
+
return str(create_url(drivername="postgresql", **conn_params))
|
167
167
|
|
168
168
|
def get_sqlalchemy_engine(self, engine_kwargs=None):
|
169
169
|
"""Overridden to pass Redshift-specific arguments."""
|
@@ -41,14 +41,16 @@ from urllib.parse import urlsplit
|
|
41
41
|
from uuid import uuid4
|
42
42
|
|
43
43
|
if TYPE_CHECKING:
|
44
|
-
from mypy_boto3_s3.service_resource import
|
44
|
+
from mypy_boto3_s3.service_resource import (
|
45
|
+
Bucket as S3Bucket,
|
46
|
+
Object as S3ResourceObject,
|
47
|
+
)
|
45
48
|
|
46
49
|
from airflow.utils.types import ArgNotSet
|
47
50
|
|
48
51
|
with suppress(ImportError):
|
49
52
|
from aiobotocore.client import AioBaseClient
|
50
53
|
|
51
|
-
from importlib.util import find_spec
|
52
54
|
|
53
55
|
from asgiref.sync import sync_to_async
|
54
56
|
from boto3.s3.transfer import S3Transfer, TransferConfig
|
@@ -58,15 +60,9 @@ from airflow.exceptions import AirflowException, AirflowNotFoundException
|
|
58
60
|
from airflow.providers.amazon.aws.exceptions import S3HookUriParseFailure
|
59
61
|
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
|
60
62
|
from airflow.providers.amazon.aws.utils.tags import format_tags
|
63
|
+
from airflow.providers.common.compat.lineage.hook import get_hook_lineage_collector
|
61
64
|
from airflow.utils.helpers import chunks
|
62
65
|
|
63
|
-
if find_spec("airflow.assets"):
|
64
|
-
from airflow.lineage.hook import get_hook_lineage_collector
|
65
|
-
else:
|
66
|
-
# TODO: import from common.compat directly after common.compat providers with
|
67
|
-
# asset_compat_lineage_collector released
|
68
|
-
from airflow.providers.amazon.aws.utils.asset_compat_lineage_collector import get_hook_lineage_collector
|
69
|
-
|
70
66
|
logger = logging.getLogger(__name__)
|
71
67
|
|
72
68
|
|
@@ -148,9 +144,10 @@ def unify_bucket_name_and_key(func: Callable) -> Callable:
|
|
148
144
|
|
149
145
|
if "bucket_name" not in bound_args.arguments:
|
150
146
|
with suppress(S3HookUriParseFailure):
|
151
|
-
|
152
|
-
bound_args.arguments[
|
153
|
-
|
147
|
+
(
|
148
|
+
bound_args.arguments["bucket_name"],
|
149
|
+
bound_args.arguments[key_name],
|
150
|
+
) = S3Hook.parse_s3_url(bound_args.arguments[key_name])
|
154
151
|
|
155
152
|
return func(*bound_args.args, **bound_args.kwargs)
|
156
153
|
|
@@ -318,7 +315,8 @@ class S3Hook(AwsBaseHook):
|
|
318
315
|
self.log.info('Bucket "%s" does not exist', bucket_name)
|
319
316
|
elif return_code == 403:
|
320
317
|
self.log.error(
|
321
|
-
'Access to bucket "%s" is forbidden or there was an error with the request',
|
318
|
+
'Access to bucket "%s" is forbidden or there was an error with the request',
|
319
|
+
bucket_name,
|
322
320
|
)
|
323
321
|
self.log.error(e)
|
324
322
|
return False
|
@@ -359,7 +357,8 @@ class S3Hook(AwsBaseHook):
|
|
359
357
|
self.get_conn().create_bucket(Bucket=bucket_name)
|
360
358
|
else:
|
361
359
|
self.get_conn().create_bucket(
|
362
|
-
Bucket=bucket_name,
|
360
|
+
Bucket=bucket_name,
|
361
|
+
CreateBucketConfiguration={"LocationConstraint": region_name},
|
363
362
|
)
|
364
363
|
|
365
364
|
@provide_bucket_name
|
@@ -410,7 +409,10 @@ class S3Hook(AwsBaseHook):
|
|
410
409
|
|
411
410
|
paginator = self.get_conn().get_paginator("list_objects_v2")
|
412
411
|
response = paginator.paginate(
|
413
|
-
Bucket=bucket_name,
|
412
|
+
Bucket=bucket_name,
|
413
|
+
Prefix=prefix,
|
414
|
+
Delimiter=delimiter,
|
415
|
+
PaginationConfig=config,
|
414
416
|
)
|
415
417
|
|
416
418
|
prefixes: list[str] = []
|
@@ -471,7 +473,10 @@ class S3Hook(AwsBaseHook):
|
|
471
473
|
|
472
474
|
paginator = client.get_paginator("list_objects_v2")
|
473
475
|
response = paginator.paginate(
|
474
|
-
Bucket=bucket_name,
|
476
|
+
Bucket=bucket_name,
|
477
|
+
Prefix=prefix,
|
478
|
+
Delimiter=delimiter,
|
479
|
+
PaginationConfig=config,
|
475
480
|
)
|
476
481
|
|
477
482
|
prefixes = []
|
@@ -569,7 +574,11 @@ class S3Hook(AwsBaseHook):
|
|
569
574
|
return await self._check_key_async(client, bucket, wildcard_match, bucket_keys, use_regex)
|
570
575
|
|
571
576
|
async def check_for_prefix_async(
|
572
|
-
self,
|
577
|
+
self,
|
578
|
+
client: AioBaseClient,
|
579
|
+
prefix: str,
|
580
|
+
delimiter: str,
|
581
|
+
bucket_name: str | None = None,
|
573
582
|
) -> bool:
|
574
583
|
"""
|
575
584
|
Check that a prefix exists in a bucket.
|
@@ -587,7 +596,11 @@ class S3Hook(AwsBaseHook):
|
|
587
596
|
return prefix in plist
|
588
597
|
|
589
598
|
async def _check_for_prefix_async(
|
590
|
-
self,
|
599
|
+
self,
|
600
|
+
client: AioBaseClient,
|
601
|
+
prefix: str,
|
602
|
+
delimiter: str,
|
603
|
+
bucket_name: str | None = None,
|
591
604
|
) -> bool:
|
592
605
|
return await self.check_for_prefix_async(
|
593
606
|
client, prefix=prefix, delimiter=delimiter, bucket_name=bucket_name
|
@@ -643,7 +656,10 @@ class S3Hook(AwsBaseHook):
|
|
643
656
|
|
644
657
|
paginator = client.get_paginator("list_objects_v2")
|
645
658
|
response = paginator.paginate(
|
646
|
-
Bucket=bucket_name,
|
659
|
+
Bucket=bucket_name,
|
660
|
+
Prefix=prefix,
|
661
|
+
Delimiter=delimiter,
|
662
|
+
PaginationConfig=config,
|
647
663
|
)
|
648
664
|
|
649
665
|
keys = []
|
@@ -655,7 +671,10 @@ class S3Hook(AwsBaseHook):
|
|
655
671
|
return keys
|
656
672
|
|
657
673
|
def _list_key_object_filter(
|
658
|
-
self,
|
674
|
+
self,
|
675
|
+
keys: list,
|
676
|
+
from_datetime: datetime | None = None,
|
677
|
+
to_datetime: datetime | None = None,
|
659
678
|
) -> list:
|
660
679
|
def _is_in_period(input_date: datetime) -> bool:
|
661
680
|
if from_datetime is not None and input_date <= from_datetime:
|
@@ -766,7 +785,10 @@ class S3Hook(AwsBaseHook):
|
|
766
785
|
"message": success_message,
|
767
786
|
}
|
768
787
|
|
769
|
-
self.log.error(
|
788
|
+
self.log.error(
|
789
|
+
"FAILURE: Inactivity Period passed, not enough objects found in %s",
|
790
|
+
path,
|
791
|
+
)
|
770
792
|
return {
|
771
793
|
"status": "error",
|
772
794
|
"message": f"FAILURE: Inactivity Period passed, not enough objects found in {path}",
|
@@ -1109,7 +1131,13 @@ class S3Hook(AwsBaseHook):
|
|
1109
1131
|
extra_args["ACL"] = acl_policy
|
1110
1132
|
|
1111
1133
|
client = self.get_conn()
|
1112
|
-
client.upload_file(
|
1134
|
+
client.upload_file(
|
1135
|
+
filename,
|
1136
|
+
bucket_name,
|
1137
|
+
key,
|
1138
|
+
ExtraArgs=extra_args,
|
1139
|
+
Config=self.transfer_config,
|
1140
|
+
)
|
1113
1141
|
get_hook_lineage_collector().add_input_asset(
|
1114
1142
|
context=self, scheme="file", asset_kwargs={"path": filename}
|
1115
1143
|
)
|
@@ -1308,18 +1336,32 @@ class S3Hook(AwsBaseHook):
|
|
1308
1336
|
)
|
1309
1337
|
|
1310
1338
|
source_bucket_name, source_bucket_key = self.get_s3_bucket_key(
|
1311
|
-
source_bucket_name,
|
1339
|
+
source_bucket_name,
|
1340
|
+
source_bucket_key,
|
1341
|
+
"source_bucket_name",
|
1342
|
+
"source_bucket_key",
|
1312
1343
|
)
|
1313
1344
|
|
1314
|
-
copy_source = {
|
1345
|
+
copy_source = {
|
1346
|
+
"Bucket": source_bucket_name,
|
1347
|
+
"Key": source_bucket_key,
|
1348
|
+
"VersionId": source_version_id,
|
1349
|
+
}
|
1315
1350
|
response = self.get_conn().copy_object(
|
1316
|
-
Bucket=dest_bucket_name,
|
1351
|
+
Bucket=dest_bucket_name,
|
1352
|
+
Key=dest_bucket_key,
|
1353
|
+
CopySource=copy_source,
|
1354
|
+
**kwargs,
|
1317
1355
|
)
|
1318
1356
|
get_hook_lineage_collector().add_input_asset(
|
1319
|
-
context=self,
|
1357
|
+
context=self,
|
1358
|
+
scheme="s3",
|
1359
|
+
asset_kwargs={"bucket": source_bucket_name, "key": source_bucket_key},
|
1320
1360
|
)
|
1321
1361
|
get_hook_lineage_collector().add_output_asset(
|
1322
|
-
context=self,
|
1362
|
+
context=self,
|
1363
|
+
scheme="s3",
|
1364
|
+
asset_kwargs={"bucket": dest_bucket_name, "key": dest_bucket_key},
|
1323
1365
|
)
|
1324
1366
|
return response
|
1325
1367
|
|
@@ -1435,7 +1477,10 @@ class S3Hook(AwsBaseHook):
|
|
1435
1477
|
file_path = Path(local_dir, subdir, filename_in_s3)
|
1436
1478
|
|
1437
1479
|
if file_path.is_file():
|
1438
|
-
self.log.error(
|
1480
|
+
self.log.error(
|
1481
|
+
"file '%s' already exists. Failing the task and not overwriting it",
|
1482
|
+
file_path,
|
1483
|
+
)
|
1439
1484
|
raise FileExistsError
|
1440
1485
|
|
1441
1486
|
file_path.parent.mkdir(exist_ok=True, parents=True)
|
@@ -1484,7 +1529,10 @@ class S3Hook(AwsBaseHook):
|
|
1484
1529
|
s3_client = self.get_conn()
|
1485
1530
|
try:
|
1486
1531
|
return s3_client.generate_presigned_url(
|
1487
|
-
ClientMethod=client_method,
|
1532
|
+
ClientMethod=client_method,
|
1533
|
+
Params=params,
|
1534
|
+
ExpiresIn=expires_in,
|
1535
|
+
HttpMethod=http_method,
|
1488
1536
|
)
|
1489
1537
|
|
1490
1538
|
except ClientError as e:
|
@@ -153,7 +153,9 @@ class SageMakerHook(AwsBaseHook):
|
|
153
153
|
non_terminal_states = {"InProgress", "Stopping"}
|
154
154
|
endpoint_non_terminal_states = {"Creating", "Updating", "SystemUpdating", "RollingBack", "Deleting"}
|
155
155
|
pipeline_non_terminal_states = {"Executing", "Stopping"}
|
156
|
+
processing_job_non_terminal_states = {"InProgress", "Stopping"}
|
156
157
|
failed_states = {"Failed"}
|
158
|
+
processing_job_failed_states = {*failed_states, "Stopped"}
|
157
159
|
training_failed_states = {*failed_states, "Stopped"}
|
158
160
|
|
159
161
|
def __init__(self, *args, **kwargs):
|
@@ -28,7 +28,10 @@ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
|
28
28
|
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
29
29
|
|
30
30
|
if TYPE_CHECKING:
|
31
|
-
from mypy_boto3_redshift_data.type_defs import
|
31
|
+
from mypy_boto3_redshift_data.type_defs import (
|
32
|
+
DescribeStatementResponseTypeDef,
|
33
|
+
GetStatementResultResponseTypeDef,
|
34
|
+
)
|
32
35
|
|
33
36
|
from airflow.utils.context import Context
|
34
37
|
|
@@ -37,7 +40,7 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]):
|
|
37
40
|
"""
|
38
41
|
Executes SQL Statements against an Amazon Redshift cluster using Redshift Data.
|
39
42
|
|
40
|
-
|
43
|
+
... see also::
|
41
44
|
For more information on how to use this operator, take a look at the guide:
|
42
45
|
:ref:`howto/operator:RedshiftDataOperator`
|
43
46
|
|
@@ -84,7 +87,6 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]):
|
|
84
87
|
)
|
85
88
|
template_ext = (".sql",)
|
86
89
|
template_fields_renderers = {"sql": "sql"}
|
87
|
-
statement_id: str | None
|
88
90
|
|
89
91
|
def __init__(
|
90
92
|
self,
|
@@ -124,12 +126,11 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]):
|
|
124
126
|
poll_interval,
|
125
127
|
)
|
126
128
|
self.return_sql_result = return_sql_result
|
127
|
-
self.statement_id: str | None = None
|
128
129
|
self.deferrable = deferrable
|
129
130
|
self.session_id = session_id
|
130
131
|
self.session_keep_alive_seconds = session_keep_alive_seconds
|
131
132
|
|
132
|
-
def execute(self, context: Context) -> GetStatementResultResponseTypeDef | str:
|
133
|
+
def execute(self, context: Context) -> list[GetStatementResultResponseTypeDef] | list[str]:
|
133
134
|
"""Execute a statement against Amazon Redshift."""
|
134
135
|
self.log.info("Executing statement: %s", self.sql)
|
135
136
|
|
@@ -154,13 +155,14 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]):
|
|
154
155
|
session_keep_alive_seconds=self.session_keep_alive_seconds,
|
155
156
|
)
|
156
157
|
|
157
|
-
|
158
|
+
# Pull the statement ID, session ID
|
159
|
+
self.statement_id: str = query_execution_output.statement_id
|
158
160
|
|
159
161
|
if query_execution_output.session_id:
|
160
162
|
self.xcom_push(context, key="session_id", value=query_execution_output.session_id)
|
161
163
|
|
162
164
|
if self.deferrable and self.wait_for_completion:
|
163
|
-
is_finished = self.hook.check_query_is_finished(self.statement_id)
|
165
|
+
is_finished: bool = self.hook.check_query_is_finished(self.statement_id)
|
164
166
|
if not is_finished:
|
165
167
|
self.defer(
|
166
168
|
timeout=self.execution_timeout,
|
@@ -176,16 +178,13 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]):
|
|
176
178
|
method_name="execute_complete",
|
177
179
|
)
|
178
180
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
return result
|
183
|
-
else:
|
184
|
-
return self.statement_id
|
181
|
+
# Use the get_sql_results method to return the results of the SQL query, or the statement_ids,
|
182
|
+
# depending on the value of self.return_sql_result
|
183
|
+
return self.get_sql_results(statement_id=self.statement_id, return_sql_result=self.return_sql_result)
|
185
184
|
|
186
185
|
def execute_complete(
|
187
186
|
self, context: Context, event: dict[str, Any] | None = None
|
188
|
-
) -> GetStatementResultResponseTypeDef | str:
|
187
|
+
) -> list[GetStatementResultResponseTypeDef] | list[str]:
|
189
188
|
event = validate_execute_complete_event(event)
|
190
189
|
|
191
190
|
if event["status"] == "error":
|
@@ -197,16 +196,40 @@ class RedshiftDataOperator(AwsBaseOperator[RedshiftDataHook]):
|
|
197
196
|
raise AirflowException("statement_id should not be empty.")
|
198
197
|
|
199
198
|
self.log.info("%s completed successfully.", self.task_id)
|
200
|
-
if self.return_sql_result:
|
201
|
-
result = self.hook.conn.get_statement_result(Id=statement_id)
|
202
|
-
self.log.debug("Statement result: %s", result)
|
203
|
-
return result
|
204
199
|
|
205
|
-
return
|
200
|
+
# Use the get_sql_results method to return the results of the SQL query, or the statement_ids,
|
201
|
+
# depending on the value of self.return_sql_result
|
202
|
+
return self.get_sql_results(statement_id=statement_id, return_sql_result=self.return_sql_result)
|
203
|
+
|
204
|
+
def get_sql_results(
|
205
|
+
self, statement_id: str, return_sql_result: bool
|
206
|
+
) -> list[GetStatementResultResponseTypeDef] | list[str]:
|
207
|
+
"""
|
208
|
+
Retrieve either the result of the SQL query, or the statement ID(s).
|
209
|
+
|
210
|
+
:param statement_id: Statement ID of the running queries
|
211
|
+
:param return_sql_result: Boolean, true if results should be returned
|
212
|
+
"""
|
213
|
+
# ISSUE-40427: Pull the statement, and check to see if there are sub-statements. If that is the
|
214
|
+
# case, pull each of the sub-statement ID's, and grab the results. Otherwise, just use statement_id
|
215
|
+
statement: DescribeStatementResponseTypeDef = self.hook.conn.describe_statement(Id=statement_id)
|
216
|
+
statement_ids: list[str] = (
|
217
|
+
[sub_statement["Id"] for sub_statement in statement["SubStatements"]]
|
218
|
+
if len(statement.get("SubStatements", [])) > 0
|
219
|
+
else [statement_id]
|
220
|
+
)
|
221
|
+
|
222
|
+
# If returning the SQL result, use get_statement_result to return the records for each query
|
223
|
+
if return_sql_result:
|
224
|
+
results: list = [self.hook.conn.get_statement_result(Id=sid) for sid in statement_ids]
|
225
|
+
self.log.debug("Statement result(s): %s", results)
|
226
|
+
return results
|
227
|
+
else:
|
228
|
+
return statement_ids
|
206
229
|
|
207
230
|
def on_kill(self) -> None:
|
208
231
|
"""Cancel the submitted redshift query."""
|
209
|
-
if self
|
232
|
+
if hasattr(self, "statement_id"):
|
210
233
|
self.log.info("Received a kill signal.")
|
211
234
|
self.log.info("Stopping Query with statementId - %s", self.statement_id)
|
212
235
|
|
@@ -330,3 +330,35 @@ class SageMakerAutoMLSensor(SageMakerBaseSensor):
|
|
330
330
|
|
331
331
|
def state_from_response(self, response: dict) -> str:
|
332
332
|
return response["AutoMLJobStatus"]
|
333
|
+
|
334
|
+
|
335
|
+
class SageMakerProcessingSensor(SageMakerBaseSensor):
|
336
|
+
"""
|
337
|
+
Poll the processing job until it reaches a terminal state; raise AirflowException with the failure reason.
|
338
|
+
|
339
|
+
.. seealso::
|
340
|
+
For more information on how to use this sensor, take a look at the guide:
|
341
|
+
:ref:`howto/sensor:SageMakerProcessingSensor`
|
342
|
+
|
343
|
+
:param job_name: Name of the processing job to watch.
|
344
|
+
"""
|
345
|
+
|
346
|
+
template_fields: Sequence[str] = ("job_name",)
|
347
|
+
template_ext: Sequence[str] = ()
|
348
|
+
|
349
|
+
def __init__(self, *, job_name: str, **kwargs):
|
350
|
+
super().__init__(**kwargs)
|
351
|
+
self.job_name = job_name
|
352
|
+
|
353
|
+
def non_terminal_states(self) -> set[str]:
|
354
|
+
return SageMakerHook.processing_job_non_terminal_states
|
355
|
+
|
356
|
+
def failed_states(self) -> set[str]:
|
357
|
+
return SageMakerHook.processing_job_failed_states
|
358
|
+
|
359
|
+
def get_sagemaker_response(self) -> dict:
|
360
|
+
self.log.info("Poking Sagemaker ProcessingJob %s", self.job_name)
|
361
|
+
return self.hook.describe_processing_job(self.job_name)
|
362
|
+
|
363
|
+
def state_from_response(self, response: dict) -> str:
|
364
|
+
return response["ProcessingJobStatus"]
|
@@ -152,6 +152,10 @@ class RedshiftToS3Operator(BaseOperator):
|
|
152
152
|
table = self.table
|
153
153
|
return f"SELECT * FROM {table}"
|
154
154
|
|
155
|
+
@property
|
156
|
+
def use_redshift_data(self):
|
157
|
+
return bool(self.redshift_data_api_kwargs)
|
158
|
+
|
155
159
|
def execute(self, context: Context) -> None:
|
156
160
|
if self.table and self.table_as_file_name:
|
157
161
|
self.s3_key = f"{self.s3_key}/{self.table}_"
|
@@ -164,14 +168,13 @@ class RedshiftToS3Operator(BaseOperator):
|
|
164
168
|
if self.include_header and "HEADER" not in [uo.upper().strip() for uo in self.unload_options]:
|
165
169
|
self.unload_options = [*self.unload_options, "HEADER"]
|
166
170
|
|
167
|
-
|
168
|
-
|
169
|
-
redshift_hook = RedshiftDataHook(aws_conn_id=self.redshift_conn_id)
|
171
|
+
if self.use_redshift_data:
|
172
|
+
redshift_data_hook = RedshiftDataHook(aws_conn_id=self.redshift_conn_id)
|
170
173
|
for arg in ["sql", "parameters"]:
|
171
174
|
if arg in self.redshift_data_api_kwargs:
|
172
175
|
raise AirflowException(f"Cannot include param '{arg}' in Redshift Data API kwargs")
|
173
176
|
else:
|
174
|
-
|
177
|
+
redshift_sql_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
|
175
178
|
conn = S3Hook.get_connection(conn_id=self.aws_conn_id) if self.aws_conn_id else None
|
176
179
|
if conn and conn.extra_dejson.get("role_arn", False):
|
177
180
|
credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
|
@@ -187,10 +190,106 @@ class RedshiftToS3Operator(BaseOperator):
|
|
187
190
|
)
|
188
191
|
|
189
192
|
self.log.info("Executing UNLOAD command...")
|
190
|
-
if
|
191
|
-
|
193
|
+
if self.use_redshift_data:
|
194
|
+
redshift_data_hook.execute_query(
|
192
195
|
sql=unload_query, parameters=self.parameters, **self.redshift_data_api_kwargs
|
193
196
|
)
|
194
197
|
else:
|
195
|
-
|
198
|
+
redshift_sql_hook.run(unload_query, self.autocommit, parameters=self.parameters)
|
196
199
|
self.log.info("UNLOAD command complete...")
|
200
|
+
|
201
|
+
def get_openlineage_facets_on_complete(self, task_instance):
|
202
|
+
"""Implement on_complete as we may query for table details."""
|
203
|
+
from airflow.providers.amazon.aws.utils.openlineage import (
|
204
|
+
get_facets_from_redshift_table,
|
205
|
+
get_identity_column_lineage_facet,
|
206
|
+
)
|
207
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
208
|
+
Dataset,
|
209
|
+
Error,
|
210
|
+
ExtractionErrorRunFacet,
|
211
|
+
)
|
212
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
213
|
+
|
214
|
+
output_dataset = Dataset(
|
215
|
+
namespace=f"s3://{self.s3_bucket}",
|
216
|
+
name=self.s3_key,
|
217
|
+
)
|
218
|
+
|
219
|
+
if self.use_redshift_data:
|
220
|
+
redshift_data_hook = RedshiftDataHook(aws_conn_id=self.redshift_conn_id)
|
221
|
+
database = self.redshift_data_api_kwargs.get("database")
|
222
|
+
identifier = self.redshift_data_api_kwargs.get(
|
223
|
+
"cluster_identifier", self.redshift_data_api_kwargs.get("workgroup_name")
|
224
|
+
)
|
225
|
+
port = self.redshift_data_api_kwargs.get("port", "5439")
|
226
|
+
authority = f"{identifier}.{redshift_data_hook.region_name}:{port}"
|
227
|
+
else:
|
228
|
+
redshift_sql_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)
|
229
|
+
database = redshift_sql_hook.conn.schema
|
230
|
+
authority = redshift_sql_hook.get_openlineage_database_info(redshift_sql_hook.conn).authority
|
231
|
+
|
232
|
+
if self.select_query == self.default_select_query:
|
233
|
+
if self.use_redshift_data:
|
234
|
+
input_dataset_facets = get_facets_from_redshift_table(
|
235
|
+
redshift_data_hook, self.table, self.redshift_data_api_kwargs, self.schema
|
236
|
+
)
|
237
|
+
else:
|
238
|
+
input_dataset_facets = get_facets_from_redshift_table(
|
239
|
+
redshift_sql_hook, self.table, {}, self.schema
|
240
|
+
)
|
241
|
+
|
242
|
+
input_dataset = Dataset(
|
243
|
+
namespace=f"redshift://{authority}",
|
244
|
+
name=f"{database}.{self.schema}.{self.table}" if database else f"{self.schema}.{self.table}",
|
245
|
+
facets=input_dataset_facets,
|
246
|
+
)
|
247
|
+
|
248
|
+
# If default select query is used (SELECT *) output file matches the input table.
|
249
|
+
output_dataset.facets = {
|
250
|
+
"schema": input_dataset_facets["schema"],
|
251
|
+
"columnLineage": get_identity_column_lineage_facet(
|
252
|
+
field_names=[field.name for field in input_dataset_facets["schema"].fields],
|
253
|
+
input_datasets=[input_dataset],
|
254
|
+
),
|
255
|
+
}
|
256
|
+
|
257
|
+
return OperatorLineage(inputs=[input_dataset], outputs=[output_dataset])
|
258
|
+
|
259
|
+
try:
|
260
|
+
from airflow.providers.openlineage.sqlparser import SQLParser, from_table_meta
|
261
|
+
except ImportError:
|
262
|
+
return OperatorLineage(outputs=[output_dataset])
|
263
|
+
|
264
|
+
run_facets = {}
|
265
|
+
parse_result = SQLParser(dialect="redshift", default_schema=self.schema).parse(self.select_query)
|
266
|
+
if parse_result.errors:
|
267
|
+
run_facets["extractionError"] = ExtractionErrorRunFacet(
|
268
|
+
totalTasks=1,
|
269
|
+
failedTasks=1,
|
270
|
+
errors=[
|
271
|
+
Error(
|
272
|
+
errorMessage=error.message,
|
273
|
+
stackTrace=None,
|
274
|
+
task=error.origin_statement,
|
275
|
+
taskNumber=error.index,
|
276
|
+
)
|
277
|
+
for error in parse_result.errors
|
278
|
+
],
|
279
|
+
)
|
280
|
+
|
281
|
+
input_datasets = []
|
282
|
+
for in_tb in parse_result.in_tables:
|
283
|
+
ds = from_table_meta(in_tb, database, f"redshift://{authority}", False)
|
284
|
+
schema, table = ds.name.split(".")[-2:]
|
285
|
+
if self.use_redshift_data:
|
286
|
+
input_dataset_facets = get_facets_from_redshift_table(
|
287
|
+
redshift_data_hook, table, self.redshift_data_api_kwargs, schema
|
288
|
+
)
|
289
|
+
else:
|
290
|
+
input_dataset_facets = get_facets_from_redshift_table(redshift_sql_hook, table, {}, schema)
|
291
|
+
|
292
|
+
ds.facets = input_dataset_facets
|
293
|
+
input_datasets.append(ds)
|
294
|
+
|
295
|
+
return OperatorLineage(inputs=input_datasets, outputs=[output_dataset], run_facets=run_facets)
|
@@ -241,7 +241,7 @@ class S3ToRedshiftOperator(BaseOperator):
|
|
241
241
|
|
242
242
|
output_dataset = Dataset(
|
243
243
|
namespace=f"redshift://{authority}",
|
244
|
-
name=f"{database}.{self.schema}.{self.table}",
|
244
|
+
name=f"{database}.{self.schema}.{self.table}" if database else f"{self.schema}.{self.table}",
|
245
245
|
facets=output_dataset_facets,
|
246
246
|
)
|
247
247
|
|
@@ -18,7 +18,8 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
import asyncio
|
21
|
-
from
|
21
|
+
from collections.abc import AsyncIterator
|
22
|
+
from typing import TYPE_CHECKING, Any
|
22
23
|
|
23
24
|
from botocore.exceptions import ClientError, WaiterError
|
24
25
|
|
@@ -165,11 +166,10 @@ class TaskDoneTrigger(BaseTrigger):
|
|
165
166
|
)
|
166
167
|
|
167
168
|
async def run(self) -> AsyncIterator[TriggerEvent]:
|
168
|
-
async with
|
169
|
-
aws_conn_id=self.aws_conn_id, region_name=self.region
|
170
|
-
|
171
|
-
|
172
|
-
).async_conn as logs_client:
|
169
|
+
async with (
|
170
|
+
EcsHook(aws_conn_id=self.aws_conn_id, region_name=self.region).async_conn as ecs_client,
|
171
|
+
AwsLogsHook(aws_conn_id=self.aws_conn_id, region_name=self.region).async_conn as logs_client,
|
172
|
+
):
|
173
173
|
waiter = ecs_client.get_waiter("tasks_stopped")
|
174
174
|
logs_token = None
|
175
175
|
while self.waiter_max_attempts:
|
@@ -28,8 +28,9 @@ def get_provider_info():
|
|
28
28
|
"name": "Amazon",
|
29
29
|
"description": "Amazon integration (including `Amazon Web Services (AWS) <https://aws.amazon.com/>`__).\n",
|
30
30
|
"state": "ready",
|
31
|
-
"source-date-epoch":
|
31
|
+
"source-date-epoch": 1730011042,
|
32
32
|
"versions": [
|
33
|
+
"9.1.0",
|
33
34
|
"9.0.0",
|
34
35
|
"8.29.0",
|
35
36
|
"8.28.0",
|
@@ -98,7 +99,7 @@ def get_provider_info():
|
|
98
99
|
],
|
99
100
|
"dependencies": [
|
100
101
|
"apache-airflow>=2.8.0",
|
101
|
-
"apache-airflow-providers-common-compat>=1.1
|
102
|
+
"apache-airflow-providers-common-compat>=1.2.1",
|
102
103
|
"apache-airflow-providers-common-sql>=1.3.1",
|
103
104
|
"apache-airflow-providers-http",
|
104
105
|
"boto3>=1.34.90",
|
@@ -107,7 +108,6 @@ def get_provider_info():
|
|
107
108
|
"watchtower>=3.0.0,!=3.3.0,<4",
|
108
109
|
"jsonpath_ng>=1.5.3",
|
109
110
|
"redshift_connector>=2.0.918",
|
110
|
-
"sqlalchemy_redshift>=0.8.6",
|
111
111
|
"asgiref>=2.3.0",
|
112
112
|
"PyAthena>=3.0.10",
|
113
113
|
"jmespath>=0.7.0",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: apache-airflow-providers-amazon
|
3
|
-
Version: 9.
|
3
|
+
Version: 9.1.0rc1
|
4
4
|
Summary: Provider package apache-airflow-providers-amazon for Apache Airflow
|
5
5
|
Keywords: airflow-provider,amazon,airflow,integration
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
@@ -21,7 +21,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
23
23
|
Requires-Dist: PyAthena>=3.0.10
|
24
|
-
Requires-Dist: apache-airflow-providers-common-compat>=1.
|
24
|
+
Requires-Dist: apache-airflow-providers-common-compat>=1.2.1rc0
|
25
25
|
Requires-Dist: apache-airflow-providers-common-sql>=1.3.1rc0
|
26
26
|
Requires-Dist: apache-airflow-providers-http
|
27
27
|
Requires-Dist: apache-airflow>=2.8.0rc0
|
@@ -33,7 +33,6 @@ Requires-Dist: jmespath>=0.7.0
|
|
33
33
|
Requires-Dist: jsonpath_ng>=1.5.3
|
34
34
|
Requires-Dist: python3-saml>=1.16.0
|
35
35
|
Requires-Dist: redshift_connector>=2.0.918
|
36
|
-
Requires-Dist: sqlalchemy_redshift>=0.8.6
|
37
36
|
Requires-Dist: watchtower>=3.0.0,!=3.3.0,<4
|
38
37
|
Requires-Dist: aiobotocore[boto3]>=2.13.0 ; extra == "aiobotocore"
|
39
38
|
Requires-Dist: apache-airflow-providers-apache-hive ; extra == "apache.hive"
|
@@ -55,8 +54,8 @@ Requires-Dist: s3fs>=2023.10.0 ; extra == "s3fs"
|
|
55
54
|
Requires-Dist: apache-airflow-providers-salesforce ; extra == "salesforce"
|
56
55
|
Requires-Dist: apache-airflow-providers-ssh ; extra == "ssh"
|
57
56
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
58
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.
|
59
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.
|
57
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.1.0/changelog.html
|
58
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.1.0
|
60
59
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
61
60
|
Project-URL: Source Code, https://github.com/apache/airflow
|
62
61
|
Project-URL: Twitter, https://twitter.com/ApacheAirflow
|
@@ -124,7 +123,7 @@ Provides-Extra: ssh
|
|
124
123
|
|
125
124
|
Package ``apache-airflow-providers-amazon``
|
126
125
|
|
127
|
-
Release: ``9.
|
126
|
+
Release: ``9.1.0.rc1``
|
128
127
|
|
129
128
|
|
130
129
|
Amazon integration (including `Amazon Web Services (AWS) <https://aws.amazon.com/>`__).
|
@@ -137,7 +136,7 @@ This is a provider package for ``amazon`` provider. All classes for this provide
|
|
137
136
|
are in ``airflow.providers.amazon`` python package.
|
138
137
|
|
139
138
|
You can find package information and changelog for the provider
|
140
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.
|
139
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.1.0/>`_.
|
141
140
|
|
142
141
|
Installation
|
143
142
|
------------
|
@@ -155,7 +154,7 @@ Requirements
|
|
155
154
|
PIP package Version required
|
156
155
|
========================================== ======================
|
157
156
|
``apache-airflow`` ``>=2.8.0``
|
158
|
-
``apache-airflow-providers-common-compat`` ``>=1.1
|
157
|
+
``apache-airflow-providers-common-compat`` ``>=1.2.1``
|
159
158
|
``apache-airflow-providers-common-sql`` ``>=1.3.1``
|
160
159
|
``apache-airflow-providers-http``
|
161
160
|
``boto3`` ``>=1.34.90``
|
@@ -164,7 +163,6 @@ PIP package Version required
|
|
164
163
|
``watchtower`` ``>=3.0.0,!=3.3.0,<4``
|
165
164
|
``jsonpath_ng`` ``>=1.5.3``
|
166
165
|
``redshift_connector`` ``>=2.0.918``
|
167
|
-
``sqlalchemy_redshift`` ``>=0.8.6``
|
168
166
|
``asgiref`` ``>=2.3.0``
|
169
167
|
``PyAthena`` ``>=3.0.10``
|
170
168
|
``jmespath`` ``>=0.7.0``
|
@@ -204,4 +202,4 @@ Dependent package
|
|
204
202
|
====================================================================================================================== ===================
|
205
203
|
|
206
204
|
The changelog for the provider package can be found in the
|
207
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.
|
205
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-amazon/9.1.0/changelog.html>`_.
|
@@ -1,10 +1,10 @@
|
|
1
1
|
airflow/providers/amazon/LICENSE,sha256=FFb4jd2AXnOOf7XLP04pQW6jbdhG49TxlGY6fFpCV1Y,13609
|
2
|
-
airflow/providers/amazon/__init__.py,sha256=
|
3
|
-
airflow/providers/amazon/get_provider_info.py,sha256=
|
2
|
+
airflow/providers/amazon/__init__.py,sha256=MuTYiz17zqZxTSbZ537dOIWjnfLeKguUmhr0_326bro,1493
|
3
|
+
airflow/providers/amazon/get_provider_info.py,sha256=JnyQK8t0-7kcB2ed5tBqkxh0PtzYoFpe1yYseO_dU6I,68960
|
4
4
|
airflow/providers/amazon/aws/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
5
5
|
airflow/providers/amazon/aws/exceptions.py,sha256=uRGNMgXvgdzfphpOTiyj74lQhjzb70J-X8n6fsx5Jog,1864
|
6
6
|
airflow/providers/amazon/aws/assets/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
7
|
-
airflow/providers/amazon/aws/assets/s3.py,sha256=
|
7
|
+
airflow/providers/amazon/aws/assets/s3.py,sha256=wNaJiOM90-SCauD4EQneZVXMO54yDRjLPfI8D5o0-fw,1861
|
8
8
|
airflow/providers/amazon/aws/auth_manager/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
9
9
|
airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py,sha256=0E38_x_EjV6uwMqkqe0vfmLGGCpcQKEAvnr7FPQwXTY,16625
|
10
10
|
airflow/providers/amazon/aws/auth_manager/constants.py,sha256=Jdluo42InhyNGkYHB_dRtoFMpKanJLJdH0hyR9-5AZg,1050
|
@@ -38,7 +38,7 @@ airflow/providers/amazon/aws/executors/utils/exponential_backoff_retry.py,sha256
|
|
38
38
|
airflow/providers/amazon/aws/fs/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
39
39
|
airflow/providers/amazon/aws/fs/s3.py,sha256=Ty9XT9c1XArkUYcQkalvNZhuoTlEg3uKy-AIzNW9LgY,4797
|
40
40
|
airflow/providers/amazon/aws/hooks/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
41
|
-
airflow/providers/amazon/aws/hooks/appflow.py,sha256
|
41
|
+
airflow/providers/amazon/aws/hooks/appflow.py,sha256=v7o6BgFDm8McE3JtB0oMkc80_nTP0e_u_uEDWFa0sVI,5367
|
42
42
|
airflow/providers/amazon/aws/hooks/athena.py,sha256=tnck2Ts9QFVDU5MsY4_9mTULyMO9QHyMm_KTJPtCsk0,13001
|
43
43
|
airflow/providers/amazon/aws/hooks/athena_sql.py,sha256=vFIUbMMTem3xvYAUTvW3h1ypjpKVLNck3VbrAlupVLA,6844
|
44
44
|
airflow/providers/amazon/aws/hooks/base_aws.py,sha256=LDxXMKjlYFEhsZ0u0DsIjf7qt-Wr6aRn9NpfoNJc0nc,43630
|
@@ -74,9 +74,9 @@ airflow/providers/amazon/aws/hooks/quicksight.py,sha256=2Am_K-BcoqcfuWwLbWjW1Lsb
|
|
74
74
|
airflow/providers/amazon/aws/hooks/rds.py,sha256=h7NF3GZ42RKeh70rlg2BQFVpa8vNadS37slj0MsAT3w,15211
|
75
75
|
airflow/providers/amazon/aws/hooks/redshift_cluster.py,sha256=ywJxbcOy91-oGGkApo6_nRKucyaEhxXqEXikG7q9uZ4,7977
|
76
76
|
airflow/providers/amazon/aws/hooks/redshift_data.py,sha256=5Kz7tsTbDwDDAqTud1--vyi74IksfHWBouIPRXYYFJk,11812
|
77
|
-
airflow/providers/amazon/aws/hooks/redshift_sql.py,sha256=
|
78
|
-
airflow/providers/amazon/aws/hooks/s3.py,sha256=
|
79
|
-
airflow/providers/amazon/aws/hooks/sagemaker.py,sha256=
|
77
|
+
airflow/providers/amazon/aws/hooks/redshift_sql.py,sha256=UTH2cyCfdGp5o2l94le5HLCF82KpsoGgRAgd7jrmeVw,11301
|
78
|
+
airflow/providers/amazon/aws/hooks/s3.py,sha256=ChlYVLix_fQcZMl1-rpOx7ZxOUaxFGfGxhMPAiA-_lQ,61506
|
79
|
+
airflow/providers/amazon/aws/hooks/sagemaker.py,sha256=w3_zqbuNznP47TBLioHuBenNv82blDul-rppyZMRaKg,60461
|
80
80
|
airflow/providers/amazon/aws/hooks/secrets_manager.py,sha256=6srh3jUeSGoqyrSj1M6aSOaA9xT5kna0VGUC0kzH-q0,2690
|
81
81
|
airflow/providers/amazon/aws/hooks/ses.py,sha256=uOTjyhb87jNyf2B11zH1wg5Oomnsx0nM4aHteP-mCHs,4147
|
82
82
|
airflow/providers/amazon/aws/hooks/sns.py,sha256=VNbhqXZv-tw9IqWhgrAsJPgT9mm9LJ4L3EJIfp3c97w,3436
|
@@ -125,7 +125,7 @@ airflow/providers/amazon/aws/operators/neptune.py,sha256=on5oNX5K4yHfW1POE0eeZuj
|
|
125
125
|
airflow/providers/amazon/aws/operators/quicksight.py,sha256=jc3Eof19UfLt5IqbQswRzaHaK8h0ACLY99i_1Prtq10,4089
|
126
126
|
airflow/providers/amazon/aws/operators/rds.py,sha256=U2YLPx5MZCdDrLIyy-9K93W5aUtQ2VHUm6Tl0QMDomo,38396
|
127
127
|
airflow/providers/amazon/aws/operators/redshift_cluster.py,sha256=rmBHCssxrYEJ8EnENY-AnzC004lbtHvxXHpy69sHtV0,36681
|
128
|
-
airflow/providers/amazon/aws/operators/redshift_data.py,sha256=
|
128
|
+
airflow/providers/amazon/aws/operators/redshift_data.py,sha256=36MVojiezDyGZ_4aQuY8xvs9doQlz_SWpJEp6Kwkw0U,10832
|
129
129
|
airflow/providers/amazon/aws/operators/s3.py,sha256=d_K2DDNXEXkoi-WZ02-bwCf244Ogiw1PBaHcbsX-8Sg,36272
|
130
130
|
airflow/providers/amazon/aws/operators/sagemaker.py,sha256=nyGS6uLP3eUYPCwOXDhdlucSGvI2lrSV8PUJ1_1f_5w,82337
|
131
131
|
airflow/providers/amazon/aws/operators/sns.py,sha256=Rttd015UhLo4pCplGybxtLhflyu_26IFzYP7WTmQFk8,3730
|
@@ -158,7 +158,7 @@ airflow/providers/amazon/aws/sensors/quicksight.py,sha256=ow5CqGVcqH5h93EuJe71WN
|
|
158
158
|
airflow/providers/amazon/aws/sensors/rds.py,sha256=AB2dH7fLwAaQogj0NYRrOOftfeOk_INetsyVHr1_qfM,6476
|
159
159
|
airflow/providers/amazon/aws/sensors/redshift_cluster.py,sha256=m2z8vU_OfJds-hLecaebrtO6DIsseqJ-MHr7NfoJm0k,4021
|
160
160
|
airflow/providers/amazon/aws/sensors/s3.py,sha256=Fp8LnFGbAaXV_JOgvIyN_KLs5uNiV86j-YL2_oG3GMI,17306
|
161
|
-
airflow/providers/amazon/aws/sensors/sagemaker.py,sha256=
|
161
|
+
airflow/providers/amazon/aws/sensors/sagemaker.py,sha256=YfXSdpTVfIDW0Wrbe13wmnuyc4xiWcTW6EQ7KedscV4,13634
|
162
162
|
airflow/providers/amazon/aws/sensors/sqs.py,sha256=m3uPnuLZm8XB8_wd6cGmzKnaRqi67ZSJFeonEWCRDmw,10569
|
163
163
|
airflow/providers/amazon/aws/sensors/step_function.py,sha256=RqyiAelyPearI6wvkauDwfhGcAISNXvGVzwAzF1C02E,3569
|
164
164
|
airflow/providers/amazon/aws/transfers/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
|
@@ -175,10 +175,10 @@ airflow/providers/amazon/aws/transfers/http_to_s3.py,sha256=J1HjIdGsd4Zl8kk-RJAX
|
|
175
175
|
airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py,sha256=xYJ94xNDsadluJpUUv1fURLW7YhSgL-9GaebZ6l4RLU,4536
|
176
176
|
airflow/providers/amazon/aws/transfers/local_to_s3.py,sha256=yp9m7aZuL6YgzYRsFcyZ1wcGTXZTMO0F0CuBfkH1eGo,4165
|
177
177
|
airflow/providers/amazon/aws/transfers/mongo_to_s3.py,sha256=OU7Cge_0WQd7xEb38V-0hjSHbjZRCQ7Ay4xntcG9R28,6020
|
178
|
-
airflow/providers/amazon/aws/transfers/redshift_to_s3.py,sha256
|
178
|
+
airflow/providers/amazon/aws/transfers/redshift_to_s3.py,sha256=-XMmYoHmQiZqZ5rqdYp4ZZbUNUe-Vu1z8TrkH9pxNHA,13001
|
179
179
|
airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py,sha256=BRYID2nCmjooVH4WVD7tcy5MnkGQPK4c9BwioeCsIsw,11658
|
180
180
|
airflow/providers/amazon/aws/transfers/s3_to_ftp.py,sha256=cxyNRW_FJQNNluuYr5fVluGLYnNRUvN75iHSSEHrVnY,2966
|
181
|
-
airflow/providers/amazon/aws/transfers/s3_to_redshift.py,sha256=
|
181
|
+
airflow/providers/amazon/aws/transfers/s3_to_redshift.py,sha256=GXMJV-_7Vy0CvPBOgSGWw_L2xf25XqRF9ztDFdMpWLc,11209
|
182
182
|
airflow/providers/amazon/aws/transfers/s3_to_sftp.py,sha256=bgHgKv7o8ueC_zkhzW5k2xZpFnMlBHMcDf0t4sQ7kHY,3488
|
183
183
|
airflow/providers/amazon/aws/transfers/s3_to_sql.py,sha256=kUuHDDR2ATDBHoRVq_3DOXGe8MVH9gcLHDMLMtFe4GI,4949
|
184
184
|
airflow/providers/amazon/aws/transfers/salesforce_to_s3.py,sha256=yt77guCyYqVwDLdpmLb_psAI8PpGvbCjKQO6pz1J564,5686
|
@@ -192,7 +192,7 @@ airflow/providers/amazon/aws/triggers/batch.py,sha256=GogZnPaSc1ms55_aNcSDtV4wIZ
|
|
192
192
|
airflow/providers/amazon/aws/triggers/bedrock.py,sha256=IiKyl0UUax-ex4siLjZpQGDZWyAOOhvJ-9USyRi_r3c,7260
|
193
193
|
airflow/providers/amazon/aws/triggers/comprehend.py,sha256=atK02t-G6e-Rgd-a-IHc4n-wGZ3oC4pKueOwNeaLCrI,4063
|
194
194
|
airflow/providers/amazon/aws/triggers/ec2.py,sha256=gMY3EP4TmL6SodLw12FNSLttlHd7hRhOu-q3CiG7y2w,3245
|
195
|
-
airflow/providers/amazon/aws/triggers/ecs.py,sha256=
|
195
|
+
airflow/providers/amazon/aws/triggers/ecs.py,sha256=YXFXUpQ1ejvtMIwkiet4LTLdJSkG8nr_ZEUI5lpqRGA,9222
|
196
196
|
airflow/providers/amazon/aws/triggers/eks.py,sha256=9elEgIErRQRndk4sgPZ2F9bjcaWsUoUG18Qv758ly1U,16151
|
197
197
|
airflow/providers/amazon/aws/triggers/emr.py,sha256=og83L9BPUcqqVXHwfUTD9cA-276YDo3Fnc0e2svQfqE,16566
|
198
198
|
airflow/providers/amazon/aws/triggers/glue.py,sha256=hv_nLzBRPG13MetjEfU_-KuTphLE-xyF6yW4uQJQuBc,9480
|
@@ -210,7 +210,6 @@ airflow/providers/amazon/aws/triggers/sagemaker.py,sha256=0h_JlleQwUYq0JY2QST681
|
|
210
210
|
airflow/providers/amazon/aws/triggers/sqs.py,sha256=tVA1i8XzV5AqbVQAdWrgrLKoZF8ewqgEwV7ggk1hrQM,8257
|
211
211
|
airflow/providers/amazon/aws/triggers/step_function.py,sha256=M1HGdrnxL_T9KSCBNy2t531xMNJaFc-Y792T9cSmLGM,2685
|
212
212
|
airflow/providers/amazon/aws/utils/__init__.py,sha256=yUkoHb2LuqSyHvj-HAhc2r2s04Kv_PhdyLMq52KarO8,3878
|
213
|
-
airflow/providers/amazon/aws/utils/asset_compat_lineage_collector.py,sha256=7W1T15iz-h5cf0wpW-oMroeIF882PGwqJck3elRJOIs,3606
|
214
213
|
airflow/providers/amazon/aws/utils/connection_wrapper.py,sha256=sPS-h_DK9SL2koHTaYwGoaC13Sr6NAgQXRLmvkvgXZY,16122
|
215
214
|
airflow/providers/amazon/aws/utils/eks_get_token.py,sha256=q4utFF2c02T2Lm6KIZLABOiXJeglVZKCOxq6gn14dsk,2342
|
216
215
|
airflow/providers/amazon/aws/utils/emailer.py,sha256=y-bzg1BZzOQ8J9-ed-74LY3VMv6LrLfBDtw5S4t3Tv4,1855
|
@@ -250,7 +249,7 @@ airflow/providers/amazon/aws/waiters/rds.json,sha256=HNmNQm5J-VaFHzjWb1pE5P7-Ix-
|
|
250
249
|
airflow/providers/amazon/aws/waiters/redshift.json,sha256=jOBotCgbkko1b_CHcGEbhhRvusgt0YSzVuFiZrqVP30,1742
|
251
250
|
airflow/providers/amazon/aws/waiters/sagemaker.json,sha256=JPHuQtUFZ1B7EMLfVmCRevNZ9jgpB71LM0dva8ZEO9A,5254
|
252
251
|
airflow/providers/amazon/aws/waiters/stepfunctions.json,sha256=GsOH-emGerKGBAUFmI5lpMfNGH4c0ol_PSiea25DCEY,1033
|
253
|
-
apache_airflow_providers_amazon-9.
|
254
|
-
apache_airflow_providers_amazon-9.
|
255
|
-
apache_airflow_providers_amazon-9.
|
256
|
-
apache_airflow_providers_amazon-9.
|
252
|
+
apache_airflow_providers_amazon-9.1.0rc1.dist-info/entry_points.txt,sha256=vlc0ZzhBkMrav1maTRofgksnAw4SwoQLFX9cmnTgktk,102
|
253
|
+
apache_airflow_providers_amazon-9.1.0rc1.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
254
|
+
apache_airflow_providers_amazon-9.1.0rc1.dist-info/METADATA,sha256=qBjKdJd9WgtPT5L-V5k2D8IBorkkQxcQwRTKdnEXuEg,10687
|
255
|
+
apache_airflow_providers_amazon-9.1.0rc1.dist-info/RECORD,,
|
@@ -1,106 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
from __future__ import annotations
|
18
|
-
|
19
|
-
from importlib.util import find_spec
|
20
|
-
|
21
|
-
|
22
|
-
def _get_asset_compat_hook_lineage_collector():
|
23
|
-
from airflow.lineage.hook import get_hook_lineage_collector
|
24
|
-
|
25
|
-
collector = get_hook_lineage_collector()
|
26
|
-
|
27
|
-
if all(
|
28
|
-
getattr(collector, asset_method_name, None)
|
29
|
-
for asset_method_name in ("add_input_asset", "add_output_asset", "collected_assets")
|
30
|
-
):
|
31
|
-
return collector
|
32
|
-
|
33
|
-
# dataset is renamed as asset in Airflow 3.0
|
34
|
-
|
35
|
-
from functools import wraps
|
36
|
-
|
37
|
-
from airflow.lineage.hook import DatasetLineageInfo, HookLineage
|
38
|
-
|
39
|
-
DatasetLineageInfo.asset = DatasetLineageInfo.dataset
|
40
|
-
|
41
|
-
def rename_dataset_kwargs_as_assets_kwargs(function):
|
42
|
-
@wraps(function)
|
43
|
-
def wrapper(*args, **kwargs):
|
44
|
-
if "asset_kwargs" in kwargs:
|
45
|
-
kwargs["dataset_kwargs"] = kwargs.pop("asset_kwargs")
|
46
|
-
|
47
|
-
if "asset_extra" in kwargs:
|
48
|
-
kwargs["dataset_extra"] = kwargs.pop("asset_extra")
|
49
|
-
|
50
|
-
return function(*args, **kwargs)
|
51
|
-
|
52
|
-
return wrapper
|
53
|
-
|
54
|
-
collector.create_asset = rename_dataset_kwargs_as_assets_kwargs(collector.create_dataset)
|
55
|
-
collector.add_input_asset = rename_dataset_kwargs_as_assets_kwargs(collector.add_input_dataset)
|
56
|
-
collector.add_output_asset = rename_dataset_kwargs_as_assets_kwargs(collector.add_output_dataset)
|
57
|
-
|
58
|
-
def collected_assets_compat(collector) -> HookLineage:
|
59
|
-
"""Get the collected hook lineage information."""
|
60
|
-
lineage = collector.collected_datasets
|
61
|
-
return HookLineage(
|
62
|
-
[
|
63
|
-
DatasetLineageInfo(dataset=item.dataset, count=item.count, context=item.context)
|
64
|
-
for item in lineage.inputs
|
65
|
-
],
|
66
|
-
[
|
67
|
-
DatasetLineageInfo(dataset=item.dataset, count=item.count, context=item.context)
|
68
|
-
for item in lineage.outputs
|
69
|
-
],
|
70
|
-
)
|
71
|
-
|
72
|
-
setattr(
|
73
|
-
collector.__class__,
|
74
|
-
"collected_assets",
|
75
|
-
property(lambda collector: collected_assets_compat(collector)),
|
76
|
-
)
|
77
|
-
|
78
|
-
return collector
|
79
|
-
|
80
|
-
|
81
|
-
def get_hook_lineage_collector():
|
82
|
-
# HookLineageCollector added in 2.10
|
83
|
-
try:
|
84
|
-
if find_spec("airflow.assets"):
|
85
|
-
# Dataset has been renamed as Asset in 3.0
|
86
|
-
from airflow.lineage.hook import get_hook_lineage_collector
|
87
|
-
|
88
|
-
return get_hook_lineage_collector()
|
89
|
-
|
90
|
-
return _get_asset_compat_hook_lineage_collector()
|
91
|
-
except ImportError:
|
92
|
-
|
93
|
-
class NoOpCollector:
|
94
|
-
"""
|
95
|
-
NoOpCollector is a hook lineage collector that does nothing.
|
96
|
-
|
97
|
-
It is used when you want to disable lineage collection.
|
98
|
-
"""
|
99
|
-
|
100
|
-
def add_input_asset(self, *_, **__):
|
101
|
-
pass
|
102
|
-
|
103
|
-
def add_output_asset(self, *_, **__):
|
104
|
-
pass
|
105
|
-
|
106
|
-
return NoOpCollector()
|
File without changes
|