apache-airflow-providers-amazon 8.4.0__py3-none-any.whl → 8.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/hooks/eventbridge.py +64 -1
  3. airflow/providers/amazon/aws/hooks/s3.py +2 -2
  4. airflow/providers/amazon/aws/operators/batch.py +2 -2
  5. airflow/providers/amazon/aws/operators/eventbridge.py +85 -0
  6. airflow/providers/amazon/aws/operators/rds.py +8 -1
  7. airflow/providers/amazon/aws/operators/sagemaker.py +141 -15
  8. airflow/providers/amazon/aws/sensors/sqs.py +44 -50
  9. airflow/providers/amazon/aws/transfers/gcs_to_s3.py +48 -21
  10. airflow/providers/amazon/aws/triggers/sqs.py +168 -0
  11. airflow/providers/amazon/aws/utils/sqs.py +90 -0
  12. airflow/providers/amazon/get_provider_info.py +5 -0
  13. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/METADATA +11 -8
  14. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/RECORD +19 -17
  15. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/LICENSE +0 -0
  16. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/NOTICE +0 -0
  17. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/WHEEL +0 -0
  18. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/entry_points.txt +0 -0
  19. {apache_airflow_providers_amazon-8.4.0.dist-info → apache_airflow_providers_amazon-8.5.0rc1.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ import packaging.version
28
28
 
29
29
  __all__ = ["__version__"]
30
30
 
31
- __version__ = "8.4.0"
31
+ __version__ = "8.5.0"
32
32
 
33
33
  try:
34
34
  from airflow import __version__ as airflow_version
@@ -16,12 +16,75 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
+ import json
20
+
19
21
  from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
22
+ from airflow.providers.amazon.aws.utils import trim_none_values
23
+
24
+
25
+ def _validate_json(pattern: str) -> None:
26
+ try:
27
+ json.loads(pattern)
28
+ except ValueError:
29
+ raise ValueError("`event_pattern` must be a valid JSON string.")
20
30
 
21
31
 
22
32
  class EventBridgeHook(AwsBaseHook):
23
33
  """Amazon EventBridge Hook."""
24
34
 
25
35
  def __init__(self, *args, **kwargs):
26
- """Creating object."""
27
36
  super().__init__(client_type="events", *args, **kwargs)
37
+
38
+ def put_rule(
39
+ self,
40
+ name: str,
41
+ description: str | None = None,
42
+ event_bus_name: str | None = None,
43
+ event_pattern: str | None = None,
44
+ role_arn: str | None = None,
45
+ schedule_expression: str | None = None,
46
+ state: str | None = None,
47
+ tags: list[dict] | None = None,
48
+ **kwargs,
49
+ ):
50
+ """
51
+ Create or update an EventBridge rule.
52
+
53
+ :param name: name of the rule to create or update (required)
54
+ :param description: description of the rule
55
+ :param event_bus_name: name or ARN of the event bus to associate with this rule
56
+ :param event_pattern: pattern of events to be matched to this rule
57
+ :param role_arn: the Amazon Resource Name of the IAM role associated with the rule
58
+ :param schedule_expression: the scheduling expression (for example, a cron or rate expression)
59
+ :param state: indicates whether rule is set to be "ENABLED" or "DISABLED"
60
+ :param tags: list of key-value pairs to associate with the rule
61
+
62
+ """
63
+ if not (event_pattern or schedule_expression):
64
+ raise ValueError(
65
+ "One of `event_pattern` or `schedule_expression` are required in order to "
66
+ "put or update your rule."
67
+ )
68
+
69
+ if state and state not in ["ENABLED", "DISABLED"]:
70
+ raise ValueError("`state` must be specified as ENABLED or DISABLED.")
71
+
72
+ if event_pattern:
73
+ _validate_json(event_pattern)
74
+
75
+ put_rule_kwargs: dict[str, str | list] = {
76
+ **trim_none_values(
77
+ {
78
+ "Name": name,
79
+ "Description": description,
80
+ "EventBusName": event_bus_name,
81
+ "EventPattern": event_pattern,
82
+ "RoleArn": role_arn,
83
+ "ScheduleExpression": schedule_expression,
84
+ "State": state,
85
+ "Tags": tags,
86
+ }
87
+ )
88
+ }
89
+
90
+ return self.conn.put_rule(**put_rule_kwargs)
@@ -57,7 +57,7 @@ from airflow.providers.amazon.aws.utils.tags import format_tags
57
57
  from airflow.utils.helpers import chunks
58
58
 
59
59
  if TYPE_CHECKING:
60
- from mypy_boto3_s3.service_resource import Object as S3ResourceObject
60
+ from mypy_boto3_s3.service_resource import Bucket as S3Bucket, Object as S3ResourceObject
61
61
 
62
62
  T = TypeVar("T", bound=Callable)
63
63
 
@@ -298,7 +298,7 @@ class S3Hook(AwsBaseHook):
298
298
  return False
299
299
 
300
300
  @provide_bucket_name
301
- def get_bucket(self, bucket_name: str | None = None) -> object:
301
+ def get_bucket(self, bucket_name: str | None = None) -> S3Bucket:
302
302
  """
303
303
  Returns a :py:class:`S3.Bucket` object.
304
304
 
@@ -149,7 +149,7 @@ class BatchOperator(BaseOperator):
149
149
  parameters: dict | None = None,
150
150
  job_id: str | None = None,
151
151
  waiters: Any | None = None,
152
- max_retries: int | None = None,
152
+ max_retries: int = 4200,
153
153
  status_retries: int | None = None,
154
154
  aws_conn_id: str | None = None,
155
155
  region_name: str | None = None,
@@ -223,7 +223,7 @@ class BatchOperator(BaseOperator):
223
223
  timeout=self.execution_timeout,
224
224
  trigger=BatchJobTrigger(
225
225
  job_id=self.job_id,
226
- waiter_max_attempts=self.max_retries or 10,
226
+ waiter_max_attempts=self.max_retries,
227
227
  aws_conn_id=self.aws_conn_id,
228
228
  region_name=self.region_name,
229
229
  waiter_delay=self.poll_interval,
@@ -32,6 +32,10 @@ class EventBridgePutEventsOperator(BaseOperator):
32
32
  """
33
33
  Put Events onto Amazon EventBridge.
34
34
 
35
+ .. seealso::
36
+ For more information on how to use this operator, take a look at the guide:
37
+ :ref:`howto/operator:EventBridgePutEventsOperator`
38
+
35
39
  :param entries: the list of events to be put onto EventBridge, each event is a dict (required)
36
40
  :param endpoint_id: the URL subdomain of the endpoint
37
41
  :param aws_conn_id: the AWS connection to use
@@ -85,3 +89,84 @@ class EventBridgePutEventsOperator(BaseOperator):
85
89
 
86
90
  if self.do_xcom_push:
87
91
  return [e["EventId"] for e in response["Entries"]]
92
+
93
+
94
+ class EventBridgePutRuleOperator(BaseOperator):
95
+ """
96
+ Create or update a specified EventBridge rule.
97
+
98
+ .. seealso::
99
+ For more information on how to use this operator, take a look at the guide:
100
+ :ref:`howto/operator:EventBridgePutRuleOperator`
101
+
102
+ :param name: name of the rule to create or update (required)
103
+ :param description: description of the rule
104
+ :param event_bus_name: name or ARN of the event bus to associate with this rule
105
+ :param event_pattern: pattern of events to be matched to this rule
106
+ :param role_arn: the Amazon Resource Name of the IAM role associated with the rule
107
+ :param schedule_expression: the scheduling expression (for example, a cron or rate expression)
108
+ :param state: indicates whether rule is set to be "ENABLED" or "DISABLED"
109
+ :param tags: list of key-value pairs to associate with the rule
110
+ :param region: the region where rule is to be created or updated
111
+
112
+ """
113
+
114
+ template_fields: Sequence[str] = (
115
+ "aws_conn_id",
116
+ "name",
117
+ "description",
118
+ "event_bus_name",
119
+ "event_pattern",
120
+ "role_arn",
121
+ "schedule_expression",
122
+ "state",
123
+ "tags",
124
+ "region_name",
125
+ )
126
+
127
+ def __init__(
128
+ self,
129
+ *,
130
+ name: str,
131
+ description: str | None = None,
132
+ event_bus_name: str | None = None,
133
+ event_pattern: str | None = None,
134
+ role_arn: str | None = None,
135
+ schedule_expression: str | None = None,
136
+ state: str | None = None,
137
+ tags: list | None = None,
138
+ region_name: str | None = None,
139
+ aws_conn_id: str = "aws_default",
140
+ **kwargs,
141
+ ):
142
+ super().__init__(**kwargs)
143
+ self.name = name
144
+ self.description = description
145
+ self.event_bus_name = event_bus_name
146
+ self.event_pattern = event_pattern
147
+ self.role_arn = role_arn
148
+ self.region_name = region_name
149
+ self.schedule_expression = schedule_expression
150
+ self.state = state
151
+ self.tags = tags
152
+ self.aws_conn_id = aws_conn_id
153
+
154
+ @cached_property
155
+ def hook(self) -> EventBridgeHook:
156
+ """Create and return an EventBridgeHook."""
157
+ return EventBridgeHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
158
+
159
+ def execute(self, context: Context):
160
+
161
+ self.log.info('Sending rule "%s" to EventBridge.', self.name)
162
+
163
+ return self.hook.put_rule(
164
+ name=self.name,
165
+ description=self.description,
166
+ event_bus_name=self.event_bus_name,
167
+ event_pattern=self.event_pattern,
168
+ role_arn=self.role_arn,
169
+ schedule_expression=self.schedule_expression,
170
+ state=self.state,
171
+ tags=self.tags,
172
+ )
@@ -392,6 +392,8 @@ class RdsCancelExportTaskOperator(RdsBaseOperator):
392
392
 
393
393
  :param export_task_identifier: The identifier of the snapshot export task to cancel
394
394
  :param wait_for_completion: If True, waits for DB snapshot export to cancel. (default: True)
395
+ :param check_interval: The amount of time in seconds to wait between attempts
396
+ :param max_attempts: The maximum number of attempts to be made
395
397
  """
396
398
 
397
399
  template_fields = ("export_task_identifier",)
@@ -402,6 +404,7 @@ class RdsCancelExportTaskOperator(RdsBaseOperator):
402
404
  export_task_identifier: str,
403
405
  wait_for_completion: bool = True,
404
406
  check_interval: int = 30,
407
+ max_attempts: int = 40,
405
408
  **kwargs,
406
409
  ):
407
410
  super().__init__(**kwargs)
@@ -409,6 +412,7 @@ class RdsCancelExportTaskOperator(RdsBaseOperator):
409
412
  self.export_task_identifier = export_task_identifier
410
413
  self.wait_for_completion = wait_for_completion
411
414
  self.check_interval = check_interval
415
+ self.max_attempts = max_attempts
412
416
 
413
417
  def execute(self, context: Context) -> str:
414
418
  self.log.info("Canceling export task %s", self.export_task_identifier)
@@ -419,7 +423,10 @@ class RdsCancelExportTaskOperator(RdsBaseOperator):
419
423
 
420
424
  if self.wait_for_completion:
421
425
  self.hook.wait_for_export_task_state(
422
- self.export_task_identifier, target_state="canceled", check_interval=self.check_interval
426
+ self.export_task_identifier,
427
+ target_state="canceled",
428
+ check_interval=self.check_interval,
429
+ max_attempts=self.max_attempts,
423
430
  )
424
431
  return json.dumps(cancel_export, default=str)
425
432
 
@@ -40,13 +40,16 @@ from airflow.providers.amazon.aws.utils.tags import format_tags
40
40
  from airflow.utils.json import AirflowJsonEncoder
41
41
 
42
42
  if TYPE_CHECKING:
43
+ from openlineage.client.run import Dataset
44
+
45
+ from airflow.providers.openlineage.extractors.base import OperatorLineage
43
46
  from airflow.utils.context import Context
44
47
 
45
48
  DEFAULT_CONN_ID: str = "aws_default"
46
49
  CHECK_INTERVAL_SECOND: int = 30
47
50
 
48
51
 
49
- def serialize(result: dict) -> str:
52
+ def serialize(result: dict) -> dict:
50
53
  return json.loads(json.dumps(result, cls=AirflowJsonEncoder))
51
54
 
52
55
 
@@ -158,6 +161,14 @@ class SageMakerBaseOperator(BaseOperator):
158
161
  """Return SageMakerHook."""
159
162
  return SageMakerHook(aws_conn_id=self.aws_conn_id)
160
163
 
164
+ @staticmethod
165
+ def path_to_s3_dataset(path) -> Dataset:
166
+ from openlineage.client.run import Dataset
167
+
168
+ path = path.replace("s3://", "")
169
+ split_path = path.split("/")
170
+ return Dataset(namespace=f"s3://{split_path[0]}", name="/".join(split_path[1:]), facets={})
171
+
161
172
 
162
173
  class SageMakerProcessingOperator(SageMakerBaseOperator):
163
174
  """
@@ -225,6 +236,7 @@ class SageMakerProcessingOperator(SageMakerBaseOperator):
225
236
  self.max_attempts = max_attempts or 60
226
237
  self.max_ingestion_time = max_ingestion_time
227
238
  self.deferrable = deferrable
239
+ self.serialized_job: dict
228
240
 
229
241
  def _create_integer_fields(self) -> None:
230
242
  """Set fields which should be cast to integers."""
@@ -282,14 +294,48 @@ class SageMakerProcessingOperator(SageMakerBaseOperator):
282
294
  method_name="execute_complete",
283
295
  )
284
296
 
285
- return {"Processing": serialize(self.hook.describe_processing_job(self.config["ProcessingJobName"]))}
297
+ self.serialized_job = serialize(self.hook.describe_processing_job(self.config["ProcessingJobName"]))
298
+ return {"Processing": self.serialized_job}
286
299
 
287
300
  def execute_complete(self, context, event=None):
288
301
  if event["status"] != "success":
289
302
  raise AirflowException(f"Error while running job: {event}")
290
303
  else:
291
304
  self.log.info(event["message"])
292
- return {"Processing": serialize(self.hook.describe_processing_job(self.config["ProcessingJobName"]))}
305
+ self.serialized_job = serialize(self.hook.describe_processing_job(self.config["ProcessingJobName"]))
306
+ return {"Processing": self.serialized_job}
307
+
308
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
309
+ """Returns OpenLineage data gathered from SageMaker's API response saved by processing job."""
310
+ from airflow.providers.openlineage.extractors.base import OperatorLineage
311
+
312
+ inputs = []
313
+ outputs = []
314
+ try:
315
+ inputs, outputs = self._extract_s3_dataset_identifiers(
316
+ processing_inputs=self.serialized_job["ProcessingInputs"],
317
+ processing_outputs=self.serialized_job["ProcessingOutputConfig"]["Outputs"],
318
+ )
319
+ except KeyError:
320
+ self.log.exception("Could not find input/output information in Xcom.")
321
+
322
+ return OperatorLineage(inputs=inputs, outputs=outputs)
323
+
324
+ def _extract_s3_dataset_identifiers(self, processing_inputs, processing_outputs):
325
+ inputs = []
326
+ outputs = []
327
+ try:
328
+ for processing_input in processing_inputs:
329
+ inputs.append(self.path_to_s3_dataset(processing_input["S3Input"]["S3Uri"]))
330
+ except KeyError:
331
+ self.log.exception("Cannot find S3 input details", exc_info=True)
332
+
333
+ try:
334
+ for processing_output in processing_outputs:
335
+ outputs.append(self.path_to_s3_dataset(processing_output["S3Output"]["S3Uri"]))
336
+ except KeyError:
337
+ self.log.exception("Cannot find S3 output details.", exc_info=True)
338
+ return inputs, outputs
293
339
 
294
340
 
295
341
  class SageMakerEndpointConfigOperator(SageMakerBaseOperator):
@@ -579,6 +625,8 @@ class SageMakerTransformOperator(SageMakerBaseOperator):
579
625
  Provided value: '{action_if_job_exists}'."
580
626
  )
581
627
  self.deferrable = deferrable
628
+ self.serialized_model: dict
629
+ self.serialized_tranform: dict
582
630
 
583
631
  def _create_integer_fields(self) -> None:
584
632
  """Set fields which should be cast to integers."""
@@ -650,10 +698,11 @@ class SageMakerTransformOperator(SageMakerBaseOperator):
650
698
  method_name="execute_complete",
651
699
  )
652
700
 
653
- return {
654
- "Model": serialize(self.hook.describe_model(transform_config["ModelName"])),
655
- "Transform": serialize(self.hook.describe_transform_job(transform_config["TransformJobName"])),
656
- }
701
+ self.serialized_model = serialize(self.hook.describe_model(transform_config["ModelName"]))
702
+ self.serialized_tranform = serialize(
703
+ self.hook.describe_transform_job(transform_config["TransformJobName"])
704
+ )
705
+ return {"Model": self.serialized_model, "Transform": self.serialized_tranform}
657
706
 
658
707
  def execute_complete(self, context, event=None):
659
708
  if event["status"] != "success":
@@ -661,10 +710,62 @@ class SageMakerTransformOperator(SageMakerBaseOperator):
661
710
  else:
662
711
  self.log.info(event["message"])
663
712
  transform_config = self.config.get("Transform", self.config)
664
- return {
665
- "Model": serialize(self.hook.describe_model(transform_config["ModelName"])),
666
- "Transform": serialize(self.hook.describe_transform_job(transform_config["TransformJobName"])),
667
- }
713
+ self.serialized_model = serialize(self.hook.describe_model(transform_config["ModelName"]))
714
+ self.serialized_tranform = serialize(
715
+ self.hook.describe_transform_job(transform_config["TransformJobName"])
716
+ )
717
+ return {"Model": self.serialized_model, "Transform": self.serialized_tranform}
718
+
719
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
720
+ """Returns OpenLineage data gathered from SageMaker's API response saved by transform job."""
721
+ from airflow.providers.openlineage.extractors import OperatorLineage
722
+
723
+ model_package_arn = None
724
+ transform_input = None
725
+ transform_output = None
726
+
727
+ try:
728
+ model_package_arn = self.serialized_model["PrimaryContainer"]["ModelPackageName"]
729
+ except KeyError:
730
+ self.log.error("Cannot find Model Package Name.", exc_info=True)
731
+
732
+ try:
733
+ transform_input = self.serialized_tranform["TransformInput"]["DataSource"]["S3DataSource"][
734
+ "S3Uri"
735
+ ]
736
+ transform_output = self.serialized_tranform["TransformOutput"]["S3OutputPath"]
737
+ except KeyError:
738
+ self.log.error("Cannot find some required input/output details.", exc_info=True)
739
+
740
+ inputs = []
741
+
742
+ if transform_input is not None:
743
+ inputs.append(self.path_to_s3_dataset(transform_input))
744
+
745
+ if model_package_arn is not None:
746
+ model_data_urls = self._get_model_data_urls(model_package_arn)
747
+ for model_data_url in model_data_urls:
748
+ inputs.append(self.path_to_s3_dataset(model_data_url))
749
+
750
+ outputs = []
751
+ if transform_output is not None:
752
+ outputs.append(self.path_to_s3_dataset(transform_output))
753
+
754
+ return OperatorLineage(inputs=inputs, outputs=outputs)
755
+
756
+ def _get_model_data_urls(self, model_package_arn) -> list:
757
+ model_data_urls = []
758
+ try:
759
+ model_containers = self.hook.get_conn().describe_model_package(
760
+ ModelPackageName=model_package_arn
761
+ )["InferenceSpecification"]["Containers"]
762
+
763
+ for container in model_containers:
764
+ model_data_urls.append(container["ModelDataUrl"])
765
+ except KeyError:
766
+ self.log.exception("Cannot retrieve model details.", exc_info=True)
767
+
768
+ return model_data_urls
668
769
 
669
770
 
670
771
  class SageMakerTuningOperator(SageMakerBaseOperator):
@@ -891,6 +992,7 @@ class SageMakerTrainingOperator(SageMakerBaseOperator):
891
992
  Provided value: '{action_if_job_exists}'."
892
993
  )
893
994
  self.deferrable = deferrable
995
+ self.serialized_training_data: dict
894
996
 
895
997
  def expand_role(self) -> None:
896
998
  """Expands an IAM role name into an ARN."""
@@ -951,16 +1053,40 @@ class SageMakerTrainingOperator(SageMakerBaseOperator):
951
1053
  method_name="execute_complete",
952
1054
  )
953
1055
 
954
- result = {"Training": serialize(self.hook.describe_training_job(self.config["TrainingJobName"]))}
955
- return result
1056
+ self.serialized_training_data = serialize(
1057
+ self.hook.describe_training_job(self.config["TrainingJobName"])
1058
+ )
1059
+ return {"Training": self.serialized_training_data}
956
1060
 
957
1061
  def execute_complete(self, context, event=None):
958
1062
  if event["status"] != "success":
959
1063
  raise AirflowException(f"Error while running job: {event}")
960
1064
  else:
961
1065
  self.log.info(event["message"])
962
- result = {"Training": serialize(self.hook.describe_training_job(self.config["TrainingJobName"]))}
963
- return result
1066
+ self.serialized_training_data = serialize(
1067
+ self.hook.describe_training_job(self.config["TrainingJobName"])
1068
+ )
1069
+ return {"Training": self.serialized_training_data}
1070
+
1071
+ def get_openlineage_facets_on_complete(self, task_instance) -> OperatorLineage:
1072
+ """Returns OpenLineage data gathered from SageMaker's API response saved by training job."""
1073
+ from airflow.providers.openlineage.extractors import OperatorLineage
1074
+
1075
+ inputs = []
1076
+ outputs = []
1077
+ try:
1078
+ for input_data in self.serialized_training_data["InputDataConfig"]:
1079
+ inputs.append(self.path_to_s3_dataset(input_data["DataSource"]["S3DataSource"]["S3Uri"]))
1080
+ except KeyError:
1081
+ self.log.exception("Issues extracting inputs.")
1082
+
1083
+ try:
1084
+ outputs.append(
1085
+ self.path_to_s3_dataset(self.serialized_training_data["ModelArtifacts"]["S3ModelArtifacts"])
1086
+ )
1087
+ except KeyError:
1088
+ self.log.exception("Issues extracting inputs.")
1089
+ return OperatorLineage(inputs=inputs, outputs=outputs)
964
1090
 
965
1091
 
966
1092
  class SageMakerDeleteModelOperator(SageMakerBaseOperator):
@@ -18,20 +18,22 @@
18
18
  """Reads and then deletes the message from SQS queue."""
19
19
  from __future__ import annotations
20
20
 
21
- import json
22
21
  from functools import cached_property
23
22
  from typing import TYPE_CHECKING, Any, Collection, Literal, Sequence
24
23
 
25
24
  from deprecated import deprecated
26
- from jsonpath_ng import parse
27
25
 
26
+ from airflow.configuration import conf
28
27
  from airflow.exceptions import AirflowException
29
28
  from airflow.providers.amazon.aws.hooks.base_aws import BaseAwsConnection
30
29
  from airflow.providers.amazon.aws.hooks.sqs import SqsHook
30
+ from airflow.providers.amazon.aws.triggers.sqs import SqsSensorTrigger
31
+ from airflow.providers.amazon.aws.utils.sqs import process_response
31
32
  from airflow.sensors.base import BaseSensorOperator
32
33
 
33
34
  if TYPE_CHECKING:
34
35
  from airflow.utils.context import Context
36
+ from datetime import timedelta
35
37
 
36
38
 
37
39
  class SqsSensor(BaseSensorOperator):
@@ -70,6 +72,9 @@ class SqsSensor(BaseSensorOperator):
70
72
  :param delete_message_on_reception: Default to `True`, the messages are deleted from the queue
71
73
  as soon as being consumed. Otherwise, the messages remain in the queue after consumption and
72
74
  should be deleted manually.
75
+ :param deferrable: If True, the sensor will operate in deferrable more. This mode requires aiobotocore
76
+ module to be installed.
77
+ (default: False, but can be overridden in config file by setting default_deferrable to True)
73
78
 
74
79
  """
75
80
 
@@ -88,6 +93,7 @@ class SqsSensor(BaseSensorOperator):
88
93
  message_filtering_match_values: Any = None,
89
94
  message_filtering_config: Any = None,
90
95
  delete_message_on_reception: bool = True,
96
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
91
97
  **kwargs,
92
98
  ):
93
99
  super().__init__(**kwargs)
@@ -112,6 +118,34 @@ class SqsSensor(BaseSensorOperator):
112
118
  raise TypeError("message_filtering_match_values must be specified for literal matching")
113
119
 
114
120
  self.message_filtering_config = message_filtering_config
121
+ self.deferrable = deferrable
122
+
123
+ def execute(self, context: Context) -> Any:
124
+ if self.deferrable:
125
+ self.defer(
126
+ trigger=SqsSensorTrigger(
127
+ sqs_queue=self.sqs_queue,
128
+ aws_conn_id=self.aws_conn_id,
129
+ max_messages=self.max_messages,
130
+ num_batches=self.num_batches,
131
+ wait_time_seconds=self.wait_time_seconds,
132
+ visibility_timeout=self.visibility_timeout,
133
+ message_filtering=self.message_filtering,
134
+ message_filtering_match_values=self.message_filtering_match_values,
135
+ message_filtering_config=self.message_filtering_config,
136
+ delete_message_on_reception=self.delete_message_on_reception,
137
+ waiter_delay=int(self.poke_interval),
138
+ ),
139
+ method_name="execute_complete",
140
+ timeout=timedelta(seconds=self.timeout),
141
+ )
142
+ else:
143
+ super().execute(context=context)
144
+
145
+ def execute_complete(self, context: Context, event: dict | None = None) -> None:
146
+ if event is None or event["status"] != "success":
147
+ raise AirflowException(f"Trigger error: event is {event}")
148
+ context["ti"].xcom_push(key="messages", value=event["message_batch"])
115
149
 
116
150
  def poll_sqs(self, sqs_conn: BaseAwsConnection) -> Collection:
117
151
  """
@@ -131,19 +165,7 @@ class SqsSensor(BaseSensorOperator):
131
165
  receive_message_kwargs["VisibilityTimeout"] = self.visibility_timeout
132
166
 
133
167
  response = sqs_conn.receive_message(**receive_message_kwargs)
134
-
135
- if "Messages" not in response:
136
- return []
137
-
138
- messages = response["Messages"]
139
- num_messages = len(messages)
140
- self.log.info("Received %d messages", num_messages)
141
-
142
- if num_messages and self.message_filtering:
143
- messages = self.filter_messages(messages)
144
- num_messages = len(messages)
145
- self.log.info("There are %d messages left after filtering", num_messages)
146
- return messages
168
+ return response
147
169
 
148
170
  def poke(self, context: Context):
149
171
  """
@@ -156,7 +178,13 @@ class SqsSensor(BaseSensorOperator):
156
178
 
157
179
  # perform multiple SQS call to retrieve messages in series
158
180
  for _ in range(self.num_batches):
159
- messages = self.poll_sqs(sqs_conn=self.hook.conn)
181
+ response = self.poll_sqs(sqs_conn=self.hook.conn)
182
+ messages = process_response(
183
+ response,
184
+ self.message_filtering,
185
+ self.message_filtering_match_values,
186
+ self.message_filtering_config,
187
+ )
160
188
 
161
189
  if not len(messages):
162
190
  continue
@@ -191,37 +219,3 @@ class SqsSensor(BaseSensorOperator):
191
219
  @cached_property
192
220
  def hook(self) -> SqsHook:
193
221
  return SqsHook(aws_conn_id=self.aws_conn_id)
194
-
195
- def filter_messages(self, messages):
196
- if self.message_filtering == "literal":
197
- return self.filter_messages_literal(messages)
198
- if self.message_filtering == "jsonpath":
199
- return self.filter_messages_jsonpath(messages)
200
- else:
201
- raise NotImplementedError("Override this method to define custom filters")
202
-
203
- def filter_messages_literal(self, messages):
204
- filtered_messages = []
205
- for message in messages:
206
- if message["Body"] in self.message_filtering_match_values:
207
- filtered_messages.append(message)
208
- return filtered_messages
209
-
210
- def filter_messages_jsonpath(self, messages):
211
- jsonpath_expr = parse(self.message_filtering_config)
212
- filtered_messages = []
213
- for message in messages:
214
- body = message["Body"]
215
- # Body is a string, deserialize to an object and then parse
216
- body = json.loads(body)
217
- results = jsonpath_expr.find(body)
218
- if not results:
219
- continue
220
- if self.message_filtering_match_values is None:
221
- filtered_messages.append(message)
222
- continue
223
- for result in results:
224
- if result.value in self.message_filtering_match_values:
225
- filtered_messages.append(message)
226
- break
227
- return filtered_messages
@@ -22,7 +22,9 @@ import os
22
22
  import warnings
23
23
  from typing import TYPE_CHECKING, Sequence
24
24
 
25
- from airflow.exceptions import AirflowProviderDeprecationWarning
25
+ from packaging.version import Version
26
+
27
+ from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
26
28
  from airflow.models import BaseOperator
27
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
28
30
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
@@ -80,6 +82,8 @@ class GCSToS3Operator(BaseOperator):
80
82
  on the bucket is recreated within path passed in dest_s3_key.
81
83
  :param match_glob: (Optional) filters objects based on the glob pattern given by the string
82
84
  (e.g, ``'**/*/.json'``)
85
+ :param gcp_user_project: (Optional) The identifier of the Google Cloud project to bill for this request.
86
+ Required for Requester Pays buckets.
83
87
  """
84
88
 
85
89
  template_fields: Sequence[str] = (
@@ -88,6 +92,7 @@ class GCSToS3Operator(BaseOperator):
88
92
  "delimiter",
89
93
  "dest_s3_key",
90
94
  "google_impersonation_chain",
95
+ "gcp_user_project",
91
96
  )
92
97
  ui_color = "#f0eee4"
93
98
 
@@ -107,19 +112,13 @@ class GCSToS3Operator(BaseOperator):
107
112
  s3_acl_policy: str | None = None,
108
113
  keep_directory_structure: bool = True,
109
114
  match_glob: str | None = None,
115
+ gcp_user_project: str | None = None,
110
116
  **kwargs,
111
117
  ) -> None:
112
118
  super().__init__(**kwargs)
113
119
 
114
120
  self.bucket = bucket
115
121
  self.prefix = prefix
116
- if delimiter:
117
- warnings.warn(
118
- "Usage of 'delimiter' is deprecated, please use 'match_glob' instead",
119
- AirflowProviderDeprecationWarning,
120
- stacklevel=2,
121
- )
122
- self.delimiter = delimiter
123
122
  self.gcp_conn_id = gcp_conn_id
124
123
  self.dest_aws_conn_id = dest_aws_conn_id
125
124
  self.dest_s3_key = dest_s3_key
@@ -129,11 +128,33 @@ class GCSToS3Operator(BaseOperator):
129
128
  self.dest_s3_extra_args = dest_s3_extra_args or {}
130
129
  self.s3_acl_policy = s3_acl_policy
131
130
  self.keep_directory_structure = keep_directory_structure
131
+ try:
132
+ from airflow.providers.google import __version__
133
+
134
+ if Version(__version__) >= Version("10.3.0"):
135
+ self.__is_match_glob_supported = True
136
+ else:
137
+ self.__is_match_glob_supported = False
138
+ except ImportError: # __version__ was added in 10.1.0, so this means it's < 10.3.0
139
+ self.__is_match_glob_supported = False
140
+ if self.__is_match_glob_supported:
141
+ if delimiter:
142
+ warnings.warn(
143
+ "Usage of 'delimiter' is deprecated, please use 'match_glob' instead",
144
+ AirflowProviderDeprecationWarning,
145
+ stacklevel=2,
146
+ )
147
+ elif match_glob:
148
+ raise AirflowException(
149
+ "The 'match_glob' parameter requires 'apache-airflow-providers-google>=10.3.0'."
150
+ )
151
+ self.delimiter = delimiter
132
152
  self.match_glob = match_glob
153
+ self.gcp_user_project = gcp_user_project
133
154
 
134
155
  def execute(self, context: Context) -> list[str]:
135
156
  # list all files in an Google Cloud Storage bucket
136
- hook = GCSHook(
157
+ gcs_hook = GCSHook(
137
158
  gcp_conn_id=self.gcp_conn_id,
138
159
  impersonation_chain=self.google_impersonation_chain,
139
160
  )
@@ -145,9 +166,16 @@ class GCSToS3Operator(BaseOperator):
145
166
  self.prefix,
146
167
  )
147
168
 
148
- files = hook.list(
149
- bucket_name=self.bucket, prefix=self.prefix, delimiter=self.delimiter, match_glob=self.match_glob
150
- )
169
+ list_kwargs = {
170
+ "bucket_name": self.bucket,
171
+ "prefix": self.prefix,
172
+ "delimiter": self.delimiter,
173
+ "user_project": self.gcp_user_project,
174
+ }
175
+ if self.__is_match_glob_supported:
176
+ list_kwargs["match_glob"] = self.match_glob
177
+
178
+ gcs_files = gcs_hook.list(**list_kwargs) # type: ignore
151
179
 
152
180
  s3_hook = S3Hook(
153
181
  aws_conn_id=self.dest_aws_conn_id, verify=self.dest_verify, extra_args=self.dest_s3_extra_args
@@ -173,24 +201,23 @@ class GCSToS3Operator(BaseOperator):
173
201
  existing_files = existing_files if existing_files is not None else []
174
202
  # remove the prefix for the existing files to allow the match
175
203
  existing_files = [file.replace(prefix, "", 1) for file in existing_files]
176
- files = list(set(files) - set(existing_files))
177
-
178
- if files:
204
+ gcs_files = list(set(gcs_files) - set(existing_files))
179
205
 
180
- for file in files:
181
- with hook.provide_file(object_name=file, bucket_name=self.bucket) as local_tmp_file:
206
+ if gcs_files:
207
+ for file in gcs_files:
208
+ with gcs_hook.provide_file(
209
+ object_name=file, bucket_name=self.bucket, user_project=self.gcp_user_project
210
+ ) as local_tmp_file:
182
211
  dest_key = os.path.join(self.dest_s3_key, file)
183
212
  self.log.info("Saving file to %s", dest_key)
184
-
185
213
  s3_hook.load_file(
186
214
  filename=local_tmp_file.name,
187
215
  key=dest_key,
188
216
  replace=self.replace,
189
217
  acl_policy=self.s3_acl_policy,
190
218
  )
191
-
192
- self.log.info("All done, uploaded %d files to S3", len(files))
219
+ self.log.info("All done, uploaded %d files to S3", len(gcs_files))
193
220
  else:
194
221
  self.log.info("In sync, no files needed to be uploaded to S3")
195
222
 
196
- return files
223
+ return gcs_files
@@ -0,0 +1,168 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ from typing import Any, AsyncIterator, Collection, Literal
21
+
22
+ from airflow.exceptions import AirflowException
23
+ from airflow.providers.amazon.aws.hooks.base_aws import BaseAwsConnection
24
+ from airflow.providers.amazon.aws.hooks.sqs import SqsHook
25
+ from airflow.providers.amazon.aws.utils.sqs import process_response
26
+ from airflow.triggers.base import BaseTrigger, TriggerEvent
27
+
28
+
29
+ class SqsSensorTrigger(BaseTrigger):
30
+ """
31
+ Asynchronously get messages from an Amazon SQS queue and then delete the messages from the queue.
32
+
33
+ :param sqs_queue: The SQS queue url
34
+ :param aws_conn_id: AWS connection id
35
+ :param max_messages: The maximum number of messages to retrieve for each poke (templated)
36
+ :param num_batches: The number of times the sensor will call the SQS API to receive messages (default: 1)
37
+ :param wait_time_seconds: The time in seconds to wait for receiving messages (default: 1 second)
38
+ :param visibility_timeout: Visibility timeout, a period of time during which
39
+ Amazon SQS prevents other consumers from receiving and processing the message.
40
+ :param message_filtering: Specified how received messages should be filtered. Supported options are:
41
+ `None` (no filtering, default), `'literal'` (message Body literal match) or `'jsonpath'`
42
+ (message Body filtered using a JSONPath expression).
43
+ You may add further methods by overriding the relevant class methods.
44
+ :param message_filtering_match_values: Optional value/s for the message filter to match on.
45
+ For example, with literal matching, if a message body matches any of the specified values
46
+ then it is included. For JSONPath matching, the result of the JSONPath expression is used
47
+ and may match any of the specified values.
48
+ :param message_filtering_config: Additional configuration to pass to the message filter.
49
+ For example with JSONPath filtering you can pass a JSONPath expression string here,
50
+ such as `'foo[*].baz'`. Messages with a Body which does not match are ignored.
51
+ :param delete_message_on_reception: Default to `True`, the messages are deleted from the queue
52
+ as soon as being consumed. Otherwise, the messages remain in the queue after consumption and
53
+ should be deleted manually.
54
+ :param waiter_delay: The time in seconds to wait between calls to the SQS API to receive messages.
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ sqs_queue: str,
60
+ aws_conn_id: str = "aws_default",
61
+ max_messages: int = 5,
62
+ num_batches: int = 1,
63
+ wait_time_seconds: int = 1,
64
+ visibility_timeout: int | None = None,
65
+ message_filtering: Literal["literal", "jsonpath"] | None = None,
66
+ message_filtering_match_values: Any = None,
67
+ message_filtering_config: Any = None,
68
+ delete_message_on_reception: bool = True,
69
+ waiter_delay: int = 60,
70
+ ):
71
+ self.sqs_queue = sqs_queue
72
+ self.aws_conn_id = aws_conn_id
73
+ self.max_messages = max_messages
74
+ self.num_batches = num_batches
75
+ self.wait_time_seconds = wait_time_seconds
76
+ self.visibility_timeout = visibility_timeout
77
+ self.message_filtering = message_filtering
78
+ self.delete_message_on_reception = delete_message_on_reception
79
+ self.message_filtering_match_values = message_filtering_match_values
80
+ self.message_filtering_config = message_filtering_config
81
+ self.waiter_delay = waiter_delay
82
+
83
+ def serialize(self) -> tuple[str, dict[str, Any]]:
84
+ return (
85
+ self.__class__.__module__ + "." + self.__class__.__qualname__,
86
+ {
87
+ "sqs_queue": self.sqs_queue,
88
+ "aws_conn_id": self.aws_conn_id,
89
+ "max_messages": self.max_messages,
90
+ "num_batches": self.num_batches,
91
+ "wait_time_seconds": self.wait_time_seconds,
92
+ "visibility_timeout": self.visibility_timeout,
93
+ "message_filtering": self.message_filtering,
94
+ "delete_message_on_reception": self.delete_message_on_reception,
95
+ "message_filtering_match_values": self.message_filtering_match_values,
96
+ "message_filtering_config": self.message_filtering_config,
97
+ "waiter_delay": self.waiter_delay,
98
+ },
99
+ )
100
+
101
+ @property
102
+ def hook(self) -> SqsHook:
103
+ return SqsHook(aws_conn_id=self.aws_conn_id)
104
+
105
+ async def poll_sqs(self, client: BaseAwsConnection) -> Collection:
106
+ """
107
+ Asynchronously poll SQS queue to retrieve messages.
108
+
109
+ :param client: SQS connection
110
+ :return: A list of messages retrieved from SQS
111
+ """
112
+ self.log.info("SqsSensor checking for message on queue: %s", self.sqs_queue)
113
+
114
+ receive_message_kwargs = {
115
+ "QueueUrl": self.sqs_queue,
116
+ "MaxNumberOfMessages": self.max_messages,
117
+ "WaitTimeSeconds": self.wait_time_seconds,
118
+ }
119
+ if self.visibility_timeout is not None:
120
+ receive_message_kwargs["VisibilityTimeout"] = self.visibility_timeout
121
+
122
+ response = await client.receive_message(**receive_message_kwargs)
123
+ return response
124
+
125
+ async def poke(self, client: Any):
126
+ message_batch: list[Any] = []
127
+ for _ in range(self.num_batches):
128
+ self.log.info("starting call to poll sqs")
129
+ response = await self.poll_sqs(client=client)
130
+ messages = process_response(
131
+ response,
132
+ self.message_filtering,
133
+ self.message_filtering_match_values,
134
+ self.message_filtering_config,
135
+ )
136
+
137
+ if not messages:
138
+ continue
139
+
140
+ message_batch.extend(messages)
141
+
142
+ if self.delete_message_on_reception:
143
+ self.log.info("Deleting %d messages", len(messages))
144
+
145
+ entries = [
146
+ {"Id": message["MessageId"], "ReceiptHandle": message["ReceiptHandle"]}
147
+ for message in messages
148
+ ]
149
+ response = await client.delete_message_batch(QueueUrl=self.sqs_queue, Entries=entries)
150
+
151
+ if "Successful" not in response:
152
+ raise AirflowException(
153
+ f"Delete SQS Messages failed {str(response)} for messages {str(messages)}"
154
+ )
155
+
156
+ return message_batch
157
+
158
+ async def run(self) -> AsyncIterator[TriggerEvent]:
159
+ while True:
160
+ # This loop will run indefinitely until the timeout, which is set in the self.defer
161
+ # method, is reached.
162
+ async with self.hook.async_conn as client:
163
+ result = await self.poke(client=client)
164
+ if result:
165
+ yield TriggerEvent({"status": "success", "message_batch": result})
166
+ break
167
+ else:
168
+ await asyncio.sleep(self.waiter_delay)
@@ -0,0 +1,90 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ from typing import Any, Literal
22
+
23
+ from jsonpath_ng import parse
24
+
25
+ log = logging.getLogger(__name__)
26
+
27
+
28
+ def process_response(
29
+ response: Any,
30
+ message_filtering: Literal["literal", "jsonpath"] | None = None,
31
+ message_filtering_match_values: Any = None,
32
+ message_filtering_config: Any = None,
33
+ ) -> Any:
34
+ """
35
+ Process the response from SQS.
36
+
37
+ :param response: The response from SQS
38
+ :return: The processed response
39
+ """
40
+ if not isinstance(response, dict):
41
+ return []
42
+ elif "Messages" not in response:
43
+ return []
44
+
45
+ messages = response["Messages"]
46
+ num_messages = len(messages)
47
+ log.info("Received %d messages", num_messages)
48
+
49
+ if num_messages and message_filtering:
50
+ messages = filter_messages(
51
+ messages, message_filtering, message_filtering_match_values, message_filtering_config
52
+ )
53
+ num_messages = len(messages)
54
+ log.info("There are %d messages left after filtering", num_messages)
55
+ return messages
56
+
57
+
58
+ def filter_messages(
59
+ messages, message_filtering, message_filtering_match_values, message_filtering_config
60
+ ) -> list[Any]:
61
+ if message_filtering == "literal":
62
+ return filter_messages_literal(messages, message_filtering_match_values)
63
+ if message_filtering == "jsonpath":
64
+ return filter_messages_jsonpath(messages, message_filtering_match_values, message_filtering_config)
65
+ else:
66
+ raise NotImplementedError("Override this method to define custom filters")
67
+
68
+
69
+ def filter_messages_literal(messages, message_filtering_match_values) -> list[Any]:
70
+ return [message for message in messages if message["Body"] in message_filtering_match_values]
71
+
72
+
73
+ def filter_messages_jsonpath(messages, message_filtering_match_values, message_filtering_config) -> list[Any]:
74
+ jsonpath_expr = parse(message_filtering_config)
75
+ filtered_messages = []
76
+ for message in messages:
77
+ body = message["Body"]
78
+ # Body is a string, deserialize to an object and then parse
79
+ body = json.loads(body)
80
+ results = jsonpath_expr.find(body)
81
+ if not results:
82
+ continue
83
+ if message_filtering_match_values is None:
84
+ filtered_messages.append(message)
85
+ continue
86
+ for result in results:
87
+ if result.value in message_filtering_match_values:
88
+ filtered_messages.append(message)
89
+ break
90
+ return filtered_messages
@@ -29,6 +29,7 @@ def get_provider_info():
29
29
  "description": "Amazon integration (including `Amazon Web Services (AWS) <https://aws.amazon.com/>`__).\n",
30
30
  "suspended": False,
31
31
  "versions": [
32
+ "8.5.0",
32
33
  "8.4.0",
33
34
  "8.3.1",
34
35
  "8.3.0",
@@ -716,6 +717,10 @@ def get_provider_info():
716
717
  "integration-name": "AWS Step Functions",
717
718
  "python-modules": ["airflow.providers.amazon.aws.triggers.step_function"],
718
719
  },
720
+ {
721
+ "integration-name": "Amazon Simple Queue Service (SQS)",
722
+ "python-modules": ["airflow.providers.amazon.aws.triggers.sqs"],
723
+ },
719
724
  ],
720
725
  "transfers": [
721
726
  {
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: apache-airflow-providers-amazon
3
- Version: 8.4.0
3
+ Version: 8.5.0rc1
4
4
  Summary: Provider for Apache Airflow. Implements apache-airflow-providers-amazon package
5
5
  Home-page: https://airflow.apache.org/
6
6
  Download-URL: https://archive.apache.org/dist/airflow/providers
7
7
  Author: Apache Software Foundation
8
8
  Author-email: dev@airflow.apache.org
9
9
  License: Apache License 2.0
10
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.4.0/
11
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.4.0/changelog.html
10
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.5.0/
11
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.5.0/changelog.html
12
12
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
13
13
  Project-URL: Source Code, https://github.com/apache/airflow
14
14
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
@@ -31,9 +31,9 @@ Requires-Python: ~=3.8
31
31
  Description-Content-Type: text/x-rst
32
32
  License-File: LICENSE
33
33
  License-File: NOTICE
34
- Requires-Dist: apache-airflow-providers-common-sql (>=1.3.1)
34
+ Requires-Dist: apache-airflow-providers-common-sql (>=1.3.1.dev0)
35
35
  Requires-Dist: apache-airflow-providers-http
36
- Requires-Dist: apache-airflow (>=2.4.0)
36
+ Requires-Dist: apache-airflow (>=2.4.0.dev0)
37
37
  Requires-Dist: asgiref
38
38
  Requires-Dist: boto3 (>=1.24.0)
39
39
  Requires-Dist: jsonpath-ng (>=1.5.3)
@@ -66,6 +66,8 @@ Provides-Extra: microsoft.azure
66
66
  Requires-Dist: apache-airflow-providers-microsoft-azure ; extra == 'microsoft.azure'
67
67
  Provides-Extra: mongo
68
68
  Requires-Dist: apache-airflow-providers-mongo ; extra == 'mongo'
69
+ Provides-Extra: openlineage
70
+ Requires-Dist: apache-airflow-providers-openlineage ; extra == 'openlineage'
69
71
  Provides-Extra: pandas
70
72
  Requires-Dist: pandas (>=0.17.1) ; extra == 'pandas'
71
73
  Provides-Extra: salesforce
@@ -111,7 +113,7 @@ Requires-Dist: apache-airflow-providers-ssh ; extra == 'ssh'
111
113
 
112
114
  Package ``apache-airflow-providers-amazon``
113
115
 
114
- Release: ``8.4.0``
116
+ Release: ``8.5.0rc1``
115
117
 
116
118
 
117
119
  Amazon integration (including `Amazon Web Services (AWS) <https://aws.amazon.com/>`__).
@@ -124,7 +126,7 @@ This is a provider package for ``amazon`` provider. All classes for this provide
124
126
  are in ``airflow.providers.amazon`` python package.
125
127
 
126
128
  You can find package information and changelog for the provider
127
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.4.0/>`_.
129
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.5.0/>`_.
128
130
 
129
131
 
130
132
  Installation
@@ -184,9 +186,10 @@ Dependent package
184
186
  `apache-airflow-providers-imap <https://airflow.apache.org/docs/apache-airflow-providers-imap>`_ ``imap``
185
187
  `apache-airflow-providers-microsoft-azure <https://airflow.apache.org/docs/apache-airflow-providers-microsoft-azure>`_ ``microsoft.azure``
186
188
  `apache-airflow-providers-mongo <https://airflow.apache.org/docs/apache-airflow-providers-mongo>`_ ``mongo``
189
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
187
190
  `apache-airflow-providers-salesforce <https://airflow.apache.org/docs/apache-airflow-providers-salesforce>`_ ``salesforce``
188
191
  `apache-airflow-providers-ssh <https://airflow.apache.org/docs/apache-airflow-providers-ssh>`_ ``ssh``
189
192
  ====================================================================================================================== ===================
190
193
 
191
194
  The changelog for the provider package can be found in the
192
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.4.0/changelog.html>`_.
195
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-amazon/8.5.0/changelog.html>`_.
@@ -1,5 +1,5 @@
1
- airflow/providers/amazon/__init__.py,sha256=C58S6nS30Yn4Z04vEMZLHK-zKVn9GSuPZYdRQRh_NOc,1575
2
- airflow/providers/amazon/get_provider_info.py,sha256=oSlIPHBZgMDh2s3nVx8LtDW8Xj6zsH1SqE6ZXBbDNn0,41325
1
+ airflow/providers/amazon/__init__.py,sha256=60eBmUeqz6lH1y1OFUDLhImPGgbEoFVY11WaSwGDKXA,1575
2
+ airflow/providers/amazon/get_provider_info.py,sha256=-5W_isthQiQmmNCP9Wsn-tO_yigmQ-dDKWpEb9DTamY,41529
3
3
  airflow/providers/amazon/aws/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
4
4
  airflow/providers/amazon/aws/exceptions.py,sha256=UVoxpfQEdWI1319h0U78Z_r5wRFQL6DN14hJw3G1Rgo,1731
5
5
  airflow/providers/amazon/aws/hooks/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
@@ -20,7 +20,7 @@ airflow/providers/amazon/aws/hooks/ecs.py,sha256=3o2UX7XkZwcRhDCxLwqM-1hL1zDNSaP
20
20
  airflow/providers/amazon/aws/hooks/eks.py,sha256=7J1TunLNdObG0BzSjmqbEF2FWkPWoL2CKfbPskFZf5s,23958
21
21
  airflow/providers/amazon/aws/hooks/elasticache_replication_group.py,sha256=D2xrqRkHTowTEGoxPjV5UnKw9-hSFVIR0rZoWWKPGPI,12119
22
22
  airflow/providers/amazon/aws/hooks/emr.py,sha256=l9mYczK7E-lWnBozBvU-2HCQ5Q23-id6o1ikiWf8JII,20679
23
- airflow/providers/amazon/aws/hooks/eventbridge.py,sha256=ZZjzmEMmLUR6qHaWsYC6YOXbyFLv6xCmH-TZwyjYkPM,1099
23
+ airflow/providers/amazon/aws/hooks/eventbridge.py,sha256=vWHZHnVwLsYmS64zSryj6VuWvjMsyiMQIIbJMFVmdSo,3389
24
24
  airflow/providers/amazon/aws/hooks/glacier.py,sha256=z4g3PCktIDjoq8O0SV_HDheGb0BNR4yy6bOlRCBH7_k,3452
25
25
  airflow/providers/amazon/aws/hooks/glue.py,sha256=KTEw7Y6RPvvP6xmPHYGfe3KMmrGtaMrPbSdHk_WWx7M,16313
26
26
  airflow/providers/amazon/aws/hooks/glue_catalog.py,sha256=4TBL4JSfcU4VILMZWL0mby-W2wha1s9Lo3ZDOeqVjkU,7616
@@ -33,7 +33,7 @@ airflow/providers/amazon/aws/hooks/rds.py,sha256=vB8GADMEyfIsrvNZB_B2aDZZB0LSFFB
33
33
  airflow/providers/amazon/aws/hooks/redshift_cluster.py,sha256=2-DjciI4d6u8PsOODxgkB3LfKwWBgt2MUQuQcETgHig,13169
34
34
  airflow/providers/amazon/aws/hooks/redshift_data.py,sha256=jro_OQravSSoMfsrnIv7chkn-WKD6Sl4CO5nswUL250,8342
35
35
  airflow/providers/amazon/aws/hooks/redshift_sql.py,sha256=II8iY4s1_uEEZmztkYipi5M5kfHmtYcurnJk9mC03oo,7160
36
- airflow/providers/amazon/aws/hooks/s3.py,sha256=lZBq5HTHGJA_cjbvhG6pzkLi4zdnLsfLNIGyxjNokDE,58255
36
+ airflow/providers/amazon/aws/hooks/s3.py,sha256=BqxL4EsR6A0BszuNxzV9KCbqPzpul7tDPoctv7P2oxM,58277
37
37
  airflow/providers/amazon/aws/hooks/sagemaker.py,sha256=_nYt3jL9JyRPI4Mu4Yu5xvZPZbkNNTvkuCxkVYqMZcE,56469
38
38
  airflow/providers/amazon/aws/hooks/secrets_manager.py,sha256=BTsEHm3GTcoNnE8jBJO403q6wuGkXZFwglQ7l-P9b5M,2667
39
39
  airflow/providers/amazon/aws/hooks/ses.py,sha256=M8rSXic50lmrE4D9a34GBLTavShg4zsFgDznlhUZiNY,4146
@@ -56,7 +56,7 @@ airflow/providers/amazon/aws/notifications/chime.py,sha256=4Rg1A1GJYyMoVOUVwTN9w
56
56
  airflow/providers/amazon/aws/operators/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
57
57
  airflow/providers/amazon/aws/operators/appflow.py,sha256=r65gqA7l6W1AFX71fpWO-okwq1RadtBXi-0AjVYytg0,19662
58
58
  airflow/providers/amazon/aws/operators/athena.py,sha256=HGBIXYf2XnORLsRRsdEXRhMad9f12j3jFco-_vzYDRA,7164
59
- airflow/providers/amazon/aws/operators/batch.py,sha256=ZrFvFK0L6NEJyWe_mfzuWzUVMK_tPbvAXXv6flF3gO8,20436
59
+ airflow/providers/amazon/aws/operators/batch.py,sha256=3zaieJWSZdzI7hlyygKbN9-7Fsh1mrv6AJkSUXrnNHc,20423
60
60
  airflow/providers/amazon/aws/operators/cloud_formation.py,sha256=lvK-2LQwNSKgkyWzcEyMurvaEXsdZl4uj49nyi9GLqs,3633
61
61
  airflow/providers/amazon/aws/operators/datasync.py,sha256=IEWIwuWZ5iAVY20Io4l5hV-nE2Ak9M2sxTY-7dCfJus,18421
62
62
  airflow/providers/amazon/aws/operators/dms.py,sha256=6yRpzvsJ6SS_IP1m5va7mEdkemp-7I0zIZ6u_Ve2mrs,10720
@@ -64,17 +64,17 @@ airflow/providers/amazon/aws/operators/ec2.py,sha256=a6WLow1jf9DgfO_uhh4SaNIZfXz
64
64
  airflow/providers/amazon/aws/operators/ecs.py,sha256=18ut_AmWUPHrjk3FDGIVCQ1P5CqCnfPqUXlX_9wDCf4,33315
65
65
  airflow/providers/amazon/aws/operators/eks.py,sha256=PktFILJULJ7zWetKge0pzl7nzBTArVYxg8JG2_tgoXA,50133
66
66
  airflow/providers/amazon/aws/operators/emr.py,sha256=tNS-s5lMJ735_gOi-7Sf-mrnr7gLsnjCV2JwFCiaqZQ,74988
67
- airflow/providers/amazon/aws/operators/eventbridge.py,sha256=_JAFe9nuFsroTQ_Xtpu-ZgfAAhzQMyWrvz4A4VLytsI,3035
67
+ airflow/providers/amazon/aws/operators/eventbridge.py,sha256=Zn1ghxlyHhKChNMUkx5fpAGcFBScqKdZroQBz63EQic,5941
68
68
  airflow/providers/amazon/aws/operators/glacier.py,sha256=016tRPPtrvl16ixqCJRPOchp3ZpXHAj1AW-brRK9dTQ,3707
69
69
  airflow/providers/amazon/aws/operators/glue.py,sha256=2Vsv37IC-032UslGSIzxhAQIIR3mA0XzhCbhqW0er_0,9663
70
70
  airflow/providers/amazon/aws/operators/glue_crawler.py,sha256=Xui7G1DSst2Vx1LDOUfZ7trZoDZYKkaPRoVN1fztQTs,4348
71
71
  airflow/providers/amazon/aws/operators/lambda_function.py,sha256=uqV-1kSTflMo_UPBhP7oNI22wEJ4DaNixqH2Y2T1zgk,7749
72
72
  airflow/providers/amazon/aws/operators/quicksight.py,sha256=GFbMDKNkAr-87fIxJc3DMoxl30RvqS18vMhnn1nBnHY,3968
73
- airflow/providers/amazon/aws/operators/rds.py,sha256=GqZwQDzuiaoHNiJCT6f8jCF8SDMjladP-FVF0w3ltTM,38326
73
+ airflow/providers/amazon/aws/operators/rds.py,sha256=RtEVzwOu3KI4IMpfKp1S3zEUnA-kmEHgrqJPhmsa24Q,38629
74
74
  airflow/providers/amazon/aws/operators/redshift_cluster.py,sha256=-F2Z0t9LFKHjF5D-RTeaJW3tCxmOqpTs8Se_QoyuC5s,32304
75
75
  airflow/providers/amazon/aws/operators/redshift_data.py,sha256=tZzGiF3Oe9J2MCtZiu3VBtFDIHlrBOuPgWTtJE1BpRE,5982
76
76
  airflow/providers/amazon/aws/operators/s3.py,sha256=AhQzivpiTRbBfy_-1vj8FfI1YXB-CGTASlg7aT0fgIM,30235
77
- airflow/providers/amazon/aws/operators/sagemaker.py,sha256=HvdfXUiIrHamjyx4IFLEo9t2sEaIVeeSJI0Bt-b39Ig,61246
77
+ airflow/providers/amazon/aws/operators/sagemaker.py,sha256=SexTzcz2zTo2fkZwBz66NyJk3sak2HOJQ6w54vxegwk,66643
78
78
  airflow/providers/amazon/aws/operators/sns.py,sha256=vomOmDKeX177_MDOgulMyQVr4FqMHNhPo3-HnrfO370,2960
79
79
  airflow/providers/amazon/aws/operators/sqs.py,sha256=gi7qbdNkPTaDo63jfjHFuV4Rsegbg1_FGVd4-WGQMLU,3565
80
80
  airflow/providers/amazon/aws/operators/step_function.py,sha256=Q1z050K-Pzw4a1PglhMT-wxiLSIFNLaCusozA00vU4w,6411
@@ -101,7 +101,7 @@ airflow/providers/amazon/aws/sensors/rds.py,sha256=4mLAFLjY8yI3bExVdGATXIIiiuwZy
101
101
  airflow/providers/amazon/aws/sensors/redshift_cluster.py,sha256=B2f4y8aHNtdc6vew-XBFNWDx-YLnBRGXZkWEHZQQoRs,2623
102
102
  airflow/providers/amazon/aws/sensors/s3.py,sha256=8-H5mCzGXc1YgFRjDYNg0U37beVVqyHCWB3IiCQCVIQ,15066
103
103
  airflow/providers/amazon/aws/sensors/sagemaker.py,sha256=tECTTigli8-C8FYnPcm5U6z3aR5AHXZGx9iW9VXhGKc,12794
104
- airflow/providers/amazon/aws/sensors/sqs.py,sha256=XYZF64l7HgFR1Zl9TZSUVNbTuaMh6yo09T0gzBfiLF8,9543
104
+ airflow/providers/amazon/aws/sensors/sqs.py,sha256=PD0aWAPMUVHbbwH2IJziGPwWpouUYMENPYZK8sTrE9A,9793
105
105
  airflow/providers/amazon/aws/sensors/step_function.py,sha256=P2BItKtNrAeL4wnS3RGp-Ep3crUMOfE7O9SH1vvaRCQ,3333
106
106
  airflow/providers/amazon/aws/transfers/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
107
107
  airflow/providers/amazon/aws/transfers/azure_blob_to_s3.py,sha256=Q2T0VlFOb9ZpC5b7QgvX_hWafPuUkjEnMJq8x7gcOy0,7019
@@ -109,7 +109,7 @@ airflow/providers/amazon/aws/transfers/base.py,sha256=QGrfp6mZwFO691fN1Ig0cVbJXM
109
109
  airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py,sha256=T9AofurdwN9vRXTHQXA7OEkIlU5Ym5pw9553bqXU3XQ,7755
110
110
  airflow/providers/amazon/aws/transfers/exasol_to_s3.py,sha256=Hwh684UFRIfhYxHlapNuAiIse770ZmeB8Le_KMd6o04,4410
111
111
  airflow/providers/amazon/aws/transfers/ftp_to_s3.py,sha256=ZpcB8idMpcPMYcHXR6BrtgTCZo7MUI3b9HzOBphvkhM,6368
112
- airflow/providers/amazon/aws/transfers/gcs_to_s3.py,sha256=hd33lzoyGBedzaSfx4KbwVjJtL2G2SW65lJ-RU0GYqI,8544
112
+ airflow/providers/amazon/aws/transfers/gcs_to_s3.py,sha256=mn-DWZBnDaKgxF1bjvIOk6x0Gx_yl0kc5360xUXdSm4,9841
113
113
  airflow/providers/amazon/aws/transfers/glacier_to_gcs.py,sha256=J4BZ5kgsl3D6kChbdfjML1DP5cC-oXQyvJNwJKnKmzc,4702
114
114
  airflow/providers/amazon/aws/transfers/google_api_to_s3.py,sha256=yYnJEqmHLG7ZSaPYcxFhQ3X5Clsd3yCQkrNO9kpFJAY,9042
115
115
  airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py,sha256=uflqwFii7EYapwHyCiueFy34YrWLxmYFA8kHrggqhSc,4173
@@ -138,6 +138,7 @@ airflow/providers/amazon/aws/triggers/rds.py,sha256=oPyOnDc1vkRERtG2KHuq4fmhIWZ-
138
138
  airflow/providers/amazon/aws/triggers/redshift_cluster.py,sha256=hkjXixGT8HvejumHvJYQH2loZ4RTsJoLHWq1KcxLiqo,10704
139
139
  airflow/providers/amazon/aws/triggers/s3.py,sha256=FSNAFSNUwNsSFWGkQbNkfvRXgYZe9g-Qr6w5TYxM2m8,9085
140
140
  airflow/providers/amazon/aws/triggers/sagemaker.py,sha256=uXob6OSboFsrHSxqTe74LxdzMgMF9r5kbTiecS2eCvo,7906
141
+ airflow/providers/amazon/aws/triggers/sqs.py,sha256=L7FvTU_EnCgKVKJUKHCwzORTnSeC4MN7yPLYrk6r5-Y,7739
141
142
  airflow/providers/amazon/aws/triggers/step_function.py,sha256=4iK7FlWd2n-PIYO1b-cMZh-gjtXQgS91FJi-dH3LL_s,2481
142
143
  airflow/providers/amazon/aws/utils/__init__.py,sha256=rGh89AYKSlXNBkmHfmDr8dYJPQ-504_emAXBHIQH-wY,2968
143
144
  airflow/providers/amazon/aws/utils/connection_wrapper.py,sha256=UEf1bH6LYdfK3hbP0kPBHWreuJl8FlDA7HMAUc8fuHY,20649
@@ -146,6 +147,7 @@ airflow/providers/amazon/aws/utils/emailer.py,sha256=fHzzErXFs74KP6VXJrCc-1T0K1s
146
147
  airflow/providers/amazon/aws/utils/rds.py,sha256=Qx5NEHGdzdmqNOtmAnSk1xErt9v6f-25a5Huos9bvPY,963
147
148
  airflow/providers/amazon/aws/utils/redshift.py,sha256=NSSw6kd-jMtXR9iSxNFet_9UUqEN1ZvNzwdA9Abhi9Y,1897
148
149
  airflow/providers/amazon/aws/utils/sagemaker.py,sha256=893W8DBPhsyPINbFph9MKKP4O_zwptse0oUWm3XtGDE,1040
150
+ airflow/providers/amazon/aws/utils/sqs.py,sha256=nhO1NXNNOorwsP73GfJFl_EbrhKO6EqR8-WKWTZb-Bo,3271
149
151
  airflow/providers/amazon/aws/utils/tags.py,sha256=-WPb4MpzZxV4MHS6OD09EronbR_jlfuVQeEqu4cVnj0,1762
150
152
  airflow/providers/amazon/aws/utils/task_log_fetcher.py,sha256=-6yaD9kHpPfIMakkD0SD8CoZf_TBGGnMQYBuKTpcqhI,4491
151
153
  airflow/providers/amazon/aws/utils/waiter.py,sha256=FO1WupdK7Z9AonrC8w_XcRpQE7A-o4VlgaqQxV65dbk,3509
@@ -165,10 +167,10 @@ airflow/providers/amazon/aws/waiters/glue.json,sha256=xUPDsOrZ7m2O_0rSj_Dweq-EAV
165
167
  airflow/providers/amazon/aws/waiters/redshift.json,sha256=jOBotCgbkko1b_CHcGEbhhRvusgt0YSzVuFiZrqVP30,1742
166
168
  airflow/providers/amazon/aws/waiters/sagemaker.json,sha256=JPHuQtUFZ1B7EMLfVmCRevNZ9jgpB71LM0dva8ZEO9A,5254
167
169
  airflow/providers/amazon/aws/waiters/stepfunctions.json,sha256=aBaAZaGv8ZZGdN-2gvYEbq3fL_WHI_7s6SSDL-nWS1A,1034
168
- apache_airflow_providers_amazon-8.4.0.dist-info/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
169
- apache_airflow_providers_amazon-8.4.0.dist-info/METADATA,sha256=GOAoCoAvGebDyW4ismsmls5xZuePPqeZ5ifdd1GDxMM,9561
170
- apache_airflow_providers_amazon-8.4.0.dist-info/NOTICE,sha256=m-6s2XynUxVSUIxO4rVablAZCvFq-wmLrqV91DotRBw,240
171
- apache_airflow_providers_amazon-8.4.0.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
172
- apache_airflow_providers_amazon-8.4.0.dist-info/entry_points.txt,sha256=HNciwBqNoIpFPDUUu9_tL6HyozCAxlpCifH810uQTOQ,103
173
- apache_airflow_providers_amazon-8.4.0.dist-info/top_level.txt,sha256=OeMVH5md7fr2QQWpnZoOWWxWO-0WH1IP70lpTVwopPg,8
174
- apache_airflow_providers_amazon-8.4.0.dist-info/RECORD,,
170
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
171
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/METADATA,sha256=RFf-KTJsd-sTzZHAydh0O9zZLV3cZYPF-BKyoM5wY8Y,9818
172
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/NOTICE,sha256=m-6s2XynUxVSUIxO4rVablAZCvFq-wmLrqV91DotRBw,240
173
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/WHEEL,sha256=AtBG6SXL3KF_v0NxLf0ehyVOh0cold-JbJYXNGorC6Q,92
174
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/entry_points.txt,sha256=HNciwBqNoIpFPDUUu9_tL6HyozCAxlpCifH810uQTOQ,103
175
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/top_level.txt,sha256=OeMVH5md7fr2QQWpnZoOWWxWO-0WH1IP70lpTVwopPg,8
176
+ apache_airflow_providers_amazon-8.5.0rc1.dist-info/RECORD,,