apache-airflow-providers-amazon 9.4.0rc1__py3-none-any.whl → 9.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/auth_manager/avp/entities.py +3 -1
  3. airflow/providers/amazon/aws/auth_manager/avp/facade.py +1 -1
  4. airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +69 -97
  5. airflow/providers/amazon/aws/auth_manager/router/login.py +9 -4
  6. airflow/providers/amazon/aws/auth_manager/user.py +7 -4
  7. airflow/providers/amazon/aws/hooks/appflow.py +5 -15
  8. airflow/providers/amazon/aws/hooks/base_aws.py +34 -1
  9. airflow/providers/amazon/aws/hooks/ec2.py +1 -1
  10. airflow/providers/amazon/aws/hooks/eks.py +3 -6
  11. airflow/providers/amazon/aws/hooks/glue.py +6 -2
  12. airflow/providers/amazon/aws/hooks/logs.py +2 -2
  13. airflow/providers/amazon/aws/hooks/mwaa.py +79 -15
  14. airflow/providers/amazon/aws/hooks/redshift_cluster.py +1 -1
  15. airflow/providers/amazon/aws/hooks/redshift_data.py +2 -2
  16. airflow/providers/amazon/aws/hooks/s3.py +3 -1
  17. airflow/providers/amazon/aws/hooks/sagemaker.py +1 -1
  18. airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +188 -0
  19. airflow/providers/amazon/aws/links/base_aws.py +8 -1
  20. airflow/providers/amazon/aws/links/sagemaker_unified_studio.py +27 -0
  21. airflow/providers/amazon/aws/log/s3_task_handler.py +22 -7
  22. airflow/providers/amazon/aws/notifications/chime.py +1 -2
  23. airflow/providers/amazon/aws/notifications/sns.py +1 -1
  24. airflow/providers/amazon/aws/notifications/sqs.py +1 -1
  25. airflow/providers/amazon/aws/operators/ec2.py +91 -83
  26. airflow/providers/amazon/aws/operators/mwaa.py +73 -2
  27. airflow/providers/amazon/aws/operators/s3.py +147 -157
  28. airflow/providers/amazon/aws/operators/sagemaker.py +1 -2
  29. airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +155 -0
  30. airflow/providers/amazon/aws/sensors/ec2.py +5 -12
  31. airflow/providers/amazon/aws/sensors/emr.py +1 -1
  32. airflow/providers/amazon/aws/sensors/mwaa.py +160 -0
  33. airflow/providers/amazon/aws/sensors/rds.py +10 -5
  34. airflow/providers/amazon/aws/sensors/s3.py +31 -42
  35. airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +73 -0
  36. airflow/providers/amazon/aws/transfers/redshift_to_s3.py +19 -4
  37. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +19 -3
  38. airflow/providers/amazon/aws/triggers/README.md +4 -4
  39. airflow/providers/amazon/aws/triggers/base.py +11 -2
  40. airflow/providers/amazon/aws/triggers/ecs.py +6 -2
  41. airflow/providers/amazon/aws/triggers/eks.py +2 -2
  42. airflow/providers/amazon/aws/triggers/glue.py +1 -1
  43. airflow/providers/amazon/aws/triggers/mwaa.py +128 -0
  44. airflow/providers/amazon/aws/triggers/s3.py +31 -6
  45. airflow/providers/amazon/aws/triggers/sagemaker.py +2 -2
  46. airflow/providers/amazon/aws/triggers/sagemaker_unified_studio.py +66 -0
  47. airflow/providers/amazon/aws/triggers/sqs.py +11 -3
  48. airflow/providers/amazon/aws/{auth_manager/security_manager/__init__.py → utils/sagemaker_unified_studio.py} +12 -0
  49. airflow/providers/amazon/aws/utils/waiter_with_logging.py +4 -3
  50. airflow/providers/amazon/aws/waiters/mwaa.json +36 -0
  51. airflow/providers/amazon/get_provider_info.py +45 -4
  52. {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc2.dist-info}/METADATA +38 -31
  53. {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc2.dist-info}/RECORD +55 -48
  54. {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc2.dist-info}/WHEEL +1 -1
  55. airflow/providers/amazon/aws/auth_manager/security_manager/aws_security_manager_override.py +0 -40
  56. {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0rc2.dist-info}/entry_points.txt +0 -0
@@ -44,7 +44,6 @@ from airflow.providers.amazon.aws.utils import trim_none_values, validate_execut
44
44
  from airflow.providers.amazon.aws.utils.sagemaker import ApprovalStatus
45
45
  from airflow.providers.amazon.aws.utils.tags import format_tags
46
46
  from airflow.utils.helpers import prune_dict
47
- from airflow.utils.json import AirflowJsonEncoder
48
47
 
49
48
  if TYPE_CHECKING:
50
49
  from airflow.providers.common.compat.openlineage.facet import Dataset
@@ -56,7 +55,7 @@ CHECK_INTERVAL_SECOND: int = 30
56
55
 
57
56
 
58
57
  def serialize(result: dict) -> dict:
59
- return json.loads(json.dumps(result, cls=AirflowJsonEncoder))
58
+ return json.loads(json.dumps(result, default=repr))
60
59
 
61
60
 
62
61
  class SageMakerBaseOperator(BaseOperator):
@@ -0,0 +1,155 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ """This module contains the Amazon SageMaker Unified Studio Notebook operator."""
19
+
20
+ from __future__ import annotations
21
+
22
+ from functools import cached_property
23
+ from typing import TYPE_CHECKING
24
+
25
+ from airflow.configuration import conf
26
+ from airflow.exceptions import AirflowException
27
+ from airflow.models import BaseOperator
28
+ from airflow.providers.amazon.aws.hooks.sagemaker_unified_studio import (
29
+ SageMakerNotebookHook,
30
+ )
31
+ from airflow.providers.amazon.aws.links.sagemaker_unified_studio import (
32
+ SageMakerUnifiedStudioLink,
33
+ )
34
+ from airflow.providers.amazon.aws.triggers.sagemaker_unified_studio import (
35
+ SageMakerNotebookJobTrigger,
36
+ )
37
+
38
+ if TYPE_CHECKING:
39
+ from airflow.utils.context import Context
40
+
41
+
42
+ class SageMakerNotebookOperator(BaseOperator):
43
+ """
44
+ Provides Artifact execution functionality for Sagemaker Unified Studio Workflows.
45
+
46
+ Examples:
47
+ .. code-block:: python
48
+
49
+ from airflow.providers.amazon.aws.operators.sagemaker_unified_studio import SageMakerNotebookOperator
50
+
51
+ notebook_operator = SageMakerNotebookOperator(
52
+ task_id="notebook_task",
53
+ input_config={"input_path": "path/to/notebook.ipynb", "input_params": ""},
54
+ output_config={"output_format": "ipynb"},
55
+ wait_for_completion=True,
56
+ waiter_delay=10,
57
+ waiter_max_attempts=1440,
58
+ )
59
+
60
+ :param task_id: A unique, meaningful id for the task.
61
+ :param input_config: Configuration for the input file. Input path should be specified as a relative path.
62
+ The provided relative path will be automatically resolved to an absolute path within
63
+ the context of the user's home directory in the IDE. Input params should be a dict.
64
+ Example: {'input_path': 'folder/input/notebook.ipynb', 'input_params':{'key': 'value'}}
65
+ :param output_config: Configuration for the output format. It should include an output_format parameter to control
66
+ the format of the notebook execution output.
67
+ Example: {"output_formats": ["NOTEBOOK"]}
68
+ :param compute: compute configuration to use for the artifact execution. This is a required attribute
69
+ if the execution is on a remote compute.
70
+ Example: { "InstanceType": "ml.m5.large", "VolumeSizeInGB": 30, "VolumeKmsKeyId": "", "ImageUri": "string", "ContainerEntrypoint": [ "string" ]}
71
+ :param termination_condition: conditions to match to terminate the remote execution.
72
+ Example: { "MaxRuntimeInSeconds": 3600 }
73
+ :param tags: tags to be associated with the remote execution runs.
74
+ Example: { "md_analytics": "logs" }
75
+ :param wait_for_completion: Indicates whether to wait for the notebook execution to complete. If True, wait for completion; if False, don't wait.
76
+ :param waiter_delay: Interval in seconds to check the notebook execution status.
77
+ :param waiter_max_attempts: Number of attempts to wait before returning FAILED.
78
+ :param deferrable: If True, the operator will wait asynchronously for the job to complete.
79
+ This implies waiting for completion. This mode requires aiobotocore module to be installed.
80
+ (default: False)
81
+
82
+ .. seealso::
83
+ For more information on how to use this operator, take a look at the guide:
84
+ :ref:`howto/operator:SageMakerNotebookOperator`
85
+ """
86
+
87
+ operator_extra_links = (SageMakerUnifiedStudioLink(),)
88
+
89
+ def __init__(
90
+ self,
91
+ task_id: str,
92
+ input_config: dict,
93
+ output_config: dict | None = None,
94
+ compute: dict | None = None,
95
+ termination_condition: dict | None = None,
96
+ tags: dict | None = None,
97
+ wait_for_completion: bool = True,
98
+ waiter_delay: int = 10,
99
+ waiter_max_attempts: int = 1440,
100
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
101
+ **kwargs,
102
+ ):
103
+ super().__init__(task_id=task_id, **kwargs)
104
+ self.execution_name = task_id
105
+ self.input_config = input_config
106
+ self.output_config = output_config or {"output_formats": ["NOTEBOOK"]}
107
+ self.compute = compute or {}
108
+ self.termination_condition = termination_condition or {}
109
+ self.tags = tags or {}
110
+ self.wait_for_completion = wait_for_completion
111
+ self.waiter_delay = waiter_delay
112
+ self.waiter_max_attempts = waiter_max_attempts
113
+ self.deferrable = deferrable
114
+ self.input_kwargs = kwargs
115
+
116
+ @cached_property
117
+ def notebook_execution_hook(self):
118
+ if not self.input_config:
119
+ raise AirflowException("input_config is required")
120
+
121
+ if "input_path" not in self.input_config:
122
+ raise AirflowException("input_path is a required field in the input_config")
123
+
124
+ return SageMakerNotebookHook(
125
+ input_config=self.input_config,
126
+ output_config=self.output_config,
127
+ execution_name=self.execution_name,
128
+ compute=self.compute,
129
+ termination_condition=self.termination_condition,
130
+ tags=self.tags,
131
+ waiter_delay=self.waiter_delay,
132
+ waiter_max_attempts=self.waiter_max_attempts,
133
+ )
134
+
135
+ def execute(self, context: Context):
136
+ notebook_execution = self.notebook_execution_hook.start_notebook_execution()
137
+ execution_id = notebook_execution["execution_id"]
138
+
139
+ if self.deferrable:
140
+ self.defer(
141
+ trigger=SageMakerNotebookJobTrigger(
142
+ execution_id=execution_id,
143
+ execution_name=self.execution_name,
144
+ waiter_delay=self.waiter_delay,
145
+ waiter_max_attempts=self.waiter_max_attempts,
146
+ ),
147
+ method_name="execute_complete",
148
+ )
149
+ elif self.wait_for_completion:
150
+ response = self.notebook_execution_hook.wait_for_execution_completion(execution_id, context)
151
+ status = response["Status"]
152
+ log_info_message = (
153
+ f"Notebook Execution: {self.execution_name} Status: {status}. Run Id: {execution_id}"
154
+ )
155
+ self.log.info(log_info_message)
@@ -18,21 +18,21 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from collections.abc import Sequence
21
- from functools import cached_property
22
21
  from typing import TYPE_CHECKING, Any
23
22
 
24
23
  from airflow.configuration import conf
25
24
  from airflow.exceptions import AirflowException
26
25
  from airflow.providers.amazon.aws.hooks.ec2 import EC2Hook
26
+ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
27
27
  from airflow.providers.amazon.aws.triggers.ec2 import EC2StateSensorTrigger
28
28
  from airflow.providers.amazon.aws.utils import validate_execute_complete_event
29
- from airflow.sensors.base import BaseSensorOperator
29
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
30
30
 
31
31
  if TYPE_CHECKING:
32
32
  from airflow.utils.context import Context
33
33
 
34
34
 
35
- class EC2InstanceStateSensor(BaseSensorOperator):
35
+ class EC2InstanceStateSensor(AwsBaseSensor[EC2Hook]):
36
36
  """
37
37
  Poll the state of the AWS EC2 instance until the instance reaches the target state.
38
38
 
@@ -46,7 +46,8 @@ class EC2InstanceStateSensor(BaseSensorOperator):
46
46
  :param deferrable: if True, the sensor will run in deferrable mode
47
47
  """
48
48
 
49
- template_fields: Sequence[str] = ("target_state", "instance_id", "region_name")
49
+ aws_hook_class = EC2Hook
50
+ template_fields: Sequence[str] = aws_template_fields("target_state", "instance_id", "region_name")
50
51
  ui_color = "#cc8811"
51
52
  ui_fgcolor = "#ffffff"
52
53
  valid_states = ["running", "stopped", "terminated"]
@@ -56,8 +57,6 @@ class EC2InstanceStateSensor(BaseSensorOperator):
56
57
  *,
57
58
  target_state: str,
58
59
  instance_id: str,
59
- aws_conn_id: str | None = "aws_default",
60
- region_name: str | None = None,
61
60
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
62
61
  **kwargs,
63
62
  ):
@@ -66,8 +65,6 @@ class EC2InstanceStateSensor(BaseSensorOperator):
66
65
  super().__init__(**kwargs)
67
66
  self.target_state = target_state
68
67
  self.instance_id = instance_id
69
- self.aws_conn_id = aws_conn_id
70
- self.region_name = region_name
71
68
  self.deferrable = deferrable
72
69
 
73
70
  def execute(self, context: Context) -> Any:
@@ -85,10 +82,6 @@ class EC2InstanceStateSensor(BaseSensorOperator):
85
82
  else:
86
83
  super().execute(context=context)
87
84
 
88
- @cached_property
89
- def hook(self):
90
- return EC2Hook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
91
-
92
85
  def poke(self, context: Context):
93
86
  instance_state = self.hook.get_instance_state(instance_id=self.instance_id)
94
87
  self.log.info("instance state: %s", instance_state)
@@ -311,7 +311,7 @@ class EmrContainerSensor(BaseSensorOperator):
311
311
  )
312
312
 
313
313
  if state in self.FAILURE_STATES:
314
- raise AirflowException("EMR Containers sensor failed")
314
+ raise AirflowException(f"EMR Containers sensor failed due to state: {state}")
315
315
 
316
316
  if state in self.INTERMEDIATE_STATES:
317
317
  return False
@@ -0,0 +1,160 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ from __future__ import annotations
19
+
20
+ from collections.abc import Collection, Sequence
21
+ from typing import TYPE_CHECKING, Any
22
+
23
+ from airflow.configuration import conf
24
+ from airflow.exceptions import AirflowException
25
+ from airflow.providers.amazon.aws.hooks.mwaa import MwaaHook
26
+ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
27
+ from airflow.providers.amazon.aws.triggers.mwaa import MwaaDagRunCompletedTrigger
28
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
29
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
30
+ from airflow.utils.state import DagRunState
31
+
32
+ if TYPE_CHECKING:
33
+ from airflow.utils.context import Context
34
+
35
+
36
+ class MwaaDagRunSensor(AwsBaseSensor[MwaaHook]):
37
+ """
38
+ Waits for a DAG Run in an MWAA Environment to complete.
39
+
40
+ If the DAG Run fails, an AirflowException is thrown.
41
+
42
+ .. seealso::
43
+ For more information on how to use this sensor, take a look at the guide:
44
+ :ref:`howto/sensor:MwaaDagRunSensor`
45
+
46
+ :param external_env_name: The external MWAA environment name that contains the DAG Run you want to wait for
47
+ (templated)
48
+ :param external_dag_id: The DAG ID in the external MWAA environment that contains the DAG Run you want to wait for
49
+ (templated)
50
+ :param external_dag_run_id: The DAG Run ID in the external MWAA environment that you want to wait for (templated)
51
+ :param success_states: Collection of DAG Run states that would make this task marked as successful, default is
52
+ ``{airflow.utils.state.DagRunState.SUCCESS}`` (templated)
53
+ :param failure_states: Collection of DAG Run states that would make this task marked as failed and raise an
54
+ AirflowException, default is ``{airflow.utils.state.DagRunState.FAILED}`` (templated)
55
+ :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
56
+ module to be installed.
57
+ (default: False, but can be overridden in config file by setting default_deferrable to True)
58
+ :param poke_interval: Polling period in seconds to check for the status of the job. (default: 60)
59
+ :param max_retries: Number of times before returning the current state. (default: 720)
60
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
61
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
62
+ running Airflow in a distributed manner and aws_conn_id is None or
63
+ empty, then default boto3 configuration would be used (and must be
64
+ maintained on each worker node).
65
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
66
+ :param verify: Whether or not to verify SSL certificates. See:
67
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
68
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
69
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
70
+ """
71
+
72
+ aws_hook_class = MwaaHook
73
+ template_fields: Sequence[str] = aws_template_fields(
74
+ "external_env_name",
75
+ "external_dag_id",
76
+ "external_dag_run_id",
77
+ "success_states",
78
+ "failure_states",
79
+ "deferrable",
80
+ "max_retries",
81
+ "poke_interval",
82
+ )
83
+
84
+ def __init__(
85
+ self,
86
+ *,
87
+ external_env_name: str,
88
+ external_dag_id: str,
89
+ external_dag_run_id: str,
90
+ success_states: Collection[str] | None = None,
91
+ failure_states: Collection[str] | None = None,
92
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
93
+ poke_interval: int = 60,
94
+ max_retries: int = 720,
95
+ **kwargs,
96
+ ):
97
+ super().__init__(**kwargs)
98
+
99
+ self.success_states = set(success_states) if success_states else {DagRunState.SUCCESS.value}
100
+ self.failure_states = set(failure_states) if failure_states else {DagRunState.FAILED.value}
101
+
102
+ if len(self.success_states & self.failure_states):
103
+ raise ValueError("success_states and failure_states must not have any values in common")
104
+
105
+ self.external_env_name = external_env_name
106
+ self.external_dag_id = external_dag_id
107
+ self.external_dag_run_id = external_dag_run_id
108
+ self.deferrable = deferrable
109
+ self.poke_interval = poke_interval
110
+ self.max_retries = max_retries
111
+
112
+ def poke(self, context: Context) -> bool:
113
+ self.log.info(
114
+ "Poking for DAG run %s of DAG %s in MWAA environment %s",
115
+ self.external_dag_run_id,
116
+ self.external_dag_id,
117
+ self.external_env_name,
118
+ )
119
+ response = self.hook.invoke_rest_api(
120
+ env_name=self.external_env_name,
121
+ path=f"/dags/{self.external_dag_id}/dagRuns/{self.external_dag_run_id}",
122
+ method="GET",
123
+ )
124
+
125
+ # If RestApiStatusCode == 200, the RestApiResponse must have the "state" key, otherwise something terrible has
126
+ # happened in the API and KeyError would be raised
127
+ # If RestApiStatusCode >= 300, a botocore exception would've already been raised during the
128
+ # self.hook.invoke_rest_api call
129
+ # The scope of this sensor is going to only be raising AirflowException due to failure of the DAGRun
130
+
131
+ state = response["RestApiResponse"]["state"]
132
+
133
+ if state in self.failure_states:
134
+ raise AirflowException(
135
+ f"The DAG run {self.external_dag_run_id} of DAG {self.external_dag_id} in MWAA environment {self.external_env_name} "
136
+ f"failed with state: {state}"
137
+ )
138
+
139
+ return state in self.success_states
140
+
141
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
142
+ validate_execute_complete_event(event)
143
+
144
+ def execute(self, context: Context):
145
+ if self.deferrable:
146
+ self.defer(
147
+ trigger=MwaaDagRunCompletedTrigger(
148
+ external_env_name=self.external_env_name,
149
+ external_dag_id=self.external_dag_id,
150
+ external_dag_run_id=self.external_dag_run_id,
151
+ success_states=self.success_states,
152
+ failure_states=self.failure_states,
153
+ waiter_delay=self.poke_interval,
154
+ waiter_max_attempts=self.max_retries,
155
+ aws_conn_id=self.aws_conn_id,
156
+ ),
157
+ method_name="execute_complete",
158
+ )
159
+ else:
160
+ super().execute(context=context)
@@ -20,7 +20,7 @@ from collections.abc import Sequence
20
20
  from functools import cached_property
21
21
  from typing import TYPE_CHECKING
22
22
 
23
- from airflow.exceptions import AirflowNotFoundException
23
+ from airflow.exceptions import AirflowException, AirflowNotFoundException
24
24
  from airflow.providers.amazon.aws.hooks.rds import RdsHook
25
25
  from airflow.providers.amazon.aws.utils.rds import RdsDbType
26
26
  from airflow.sensors.base import BaseSensorOperator
@@ -104,18 +104,17 @@ class RdsExportTaskExistenceSensor(RdsBaseSensor):
104
104
 
105
105
  :param export_task_identifier: A unique identifier for the snapshot export task.
106
106
  :param target_statuses: Target status of export task
107
+ :param error_statuses: Target error status of export task to fail the sensor
107
108
  """
108
109
 
109
- template_fields: Sequence[str] = (
110
- "export_task_identifier",
111
- "target_statuses",
112
- )
110
+ template_fields: Sequence[str] = ("export_task_identifier", "target_statuses", "error_statuses")
113
111
 
114
112
  def __init__(
115
113
  self,
116
114
  *,
117
115
  export_task_identifier: str,
118
116
  target_statuses: list[str] | None = None,
117
+ error_statuses: list[str] | None = None,
119
118
  aws_conn_id: str | None = "aws_default",
120
119
  **kwargs,
121
120
  ):
@@ -129,6 +128,7 @@ class RdsExportTaskExistenceSensor(RdsBaseSensor):
129
128
  "canceling",
130
129
  "canceled",
131
130
  ]
131
+ self.error_statuses = error_statuses or ["failed"]
132
132
 
133
133
  def poke(self, context: Context):
134
134
  self.log.info(
@@ -136,6 +136,11 @@ class RdsExportTaskExistenceSensor(RdsBaseSensor):
136
136
  )
137
137
  try:
138
138
  state = self.hook.get_export_task_state(self.export_task_identifier)
139
+ if state in self.error_statuses:
140
+ raise AirflowException(
141
+ f"Export task {self.export_task_identifier} failed with status {state}"
142
+ )
143
+
139
144
  except AirflowNotFoundException:
140
145
  return False
141
146
  return state in self.target_statuses
@@ -23,7 +23,6 @@ import os
23
23
  import re
24
24
  from collections.abc import Sequence
25
25
  from datetime import datetime, timedelta
26
- from functools import cached_property
27
26
  from typing import TYPE_CHECKING, Any, Callable, cast
28
27
 
29
28
  from airflow.configuration import conf
@@ -34,11 +33,13 @@ if TYPE_CHECKING:
34
33
 
35
34
  from airflow.exceptions import AirflowException
36
35
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
36
+ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
37
37
  from airflow.providers.amazon.aws.triggers.s3 import S3KeysUnchangedTrigger, S3KeyTrigger
38
- from airflow.sensors.base import BaseSensorOperator, poke_mode_only
38
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
39
+ from airflow.sensors.base import poke_mode_only
39
40
 
40
41
 
41
- class S3KeySensor(BaseSensorOperator):
42
+ class S3KeySensor(AwsBaseSensor[S3Hook]):
42
43
  """
43
44
  Waits for one or multiple keys (a file-like instance on S3) to be present in a S3 bucket.
44
45
 
@@ -65,17 +66,6 @@ class S3KeySensor(BaseSensorOperator):
65
66
 
66
67
  def check_fn(files: List, **kwargs) -> bool:
67
68
  return any(f.get('Size', 0) > 1048576 for f in files)
68
- :param aws_conn_id: a reference to the s3 connection
69
- :param verify: Whether to verify SSL certificates for S3 connection.
70
- By default, SSL certificates are verified.
71
- You can provide the following values:
72
-
73
- - ``False``: do not validate SSL certificates. SSL will still be used
74
- (unless use_ssl is False), but SSL certificates will not be
75
- verified.
76
- - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
77
- You can specify this argument if you want to use a different
78
- CA cert bundle than the one used by botocore.
79
69
  :param deferrable: Run operator in the deferrable mode
80
70
  :param use_regex: whether to use regex to check bucket
81
71
  :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
@@ -83,9 +73,18 @@ class S3KeySensor(BaseSensorOperator):
83
73
  all available attributes.
84
74
  Default value: "Size".
85
75
  If the requested attribute is not found, the key is still included and the value is None.
76
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
77
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
78
+ running Airflow in a distributed manner and aws_conn_id is None or
79
+ empty, then default boto3 configuration would be used (and must be
80
+ maintained on each worker node).
81
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
82
+ :param verify: Whether or not to verify SSL certificates. See:
83
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
86
84
  """
87
85
 
88
- template_fields: Sequence[str] = ("bucket_key", "bucket_name")
86
+ template_fields: Sequence[str] = aws_template_fields("bucket_key", "bucket_name")
87
+ aws_hook_class = S3Hook
89
88
 
90
89
  def __init__(
91
90
  self,
@@ -94,7 +93,6 @@ class S3KeySensor(BaseSensorOperator):
94
93
  bucket_name: str | None = None,
95
94
  wildcard_match: bool = False,
96
95
  check_fn: Callable[..., bool] | None = None,
97
- aws_conn_id: str | None = "aws_default",
98
96
  verify: str | bool | None = None,
99
97
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
100
98
  use_regex: bool = False,
@@ -106,14 +104,13 @@ class S3KeySensor(BaseSensorOperator):
106
104
  self.bucket_key = bucket_key
107
105
  self.wildcard_match = wildcard_match
108
106
  self.check_fn = check_fn
109
- self.aws_conn_id = aws_conn_id
110
107
  self.verify = verify
111
108
  self.deferrable = deferrable
112
109
  self.use_regex = use_regex
113
110
  self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
114
111
 
115
112
  def _check_key(self, key, context: Context):
116
- bucket_name, key = S3Hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
113
+ bucket_name, key = self.hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
117
114
  self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
118
115
 
119
116
  """
@@ -199,7 +196,9 @@ class S3KeySensor(BaseSensorOperator):
199
196
  bucket_key=self.bucket_key,
200
197
  wildcard_match=self.wildcard_match,
201
198
  aws_conn_id=self.aws_conn_id,
199
+ region_name=self.region_name,
202
200
  verify=self.verify,
201
+ botocore_config=self.botocore_config,
203
202
  poke_interval=self.poke_interval,
204
203
  should_check_fn=bool(self.check_fn),
205
204
  use_regex=self.use_regex,
@@ -220,13 +219,9 @@ class S3KeySensor(BaseSensorOperator):
220
219
  elif event["status"] == "error":
221
220
  raise AirflowException(event["message"])
222
221
 
223
- @cached_property
224
- def hook(self) -> S3Hook:
225
- return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
226
-
227
222
 
228
223
  @poke_mode_only
229
- class S3KeysUnchangedSensor(BaseSensorOperator):
224
+ class S3KeysUnchangedSensor(AwsBaseSensor[S3Hook]):
230
225
  """
231
226
  Return True if inactivity_period has passed with no increase in the number of objects matching prefix.
232
227
 
@@ -239,17 +234,7 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
239
234
 
240
235
  :param bucket_name: Name of the S3 bucket
241
236
  :param prefix: The prefix being waited on. Relative path from bucket root level.
242
- :param aws_conn_id: a reference to the s3 connection
243
- :param verify: Whether or not to verify SSL certificates for S3 connection.
244
- By default SSL certificates are verified.
245
- You can provide the following values:
246
-
247
- - ``False``: do not validate SSL certificates. SSL will still be used
248
- (unless use_ssl is False), but SSL certificates will not be
249
- verified.
250
- - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
251
- You can specify this argument if you want to use a different
252
- CA cert bundle than the one used by botocore.
237
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
253
238
  :param inactivity_period: The total seconds of inactivity to designate
254
239
  keys unchanged. Note, this mechanism is not real time and
255
240
  this operator may not return until a poke_interval after this period
@@ -261,16 +246,24 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
261
246
  between pokes valid behavior. If true a warning message will be logged
262
247
  when this happens. If false an error will be raised.
263
248
  :param deferrable: Run sensor in the deferrable mode
249
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
250
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
251
+ running Airflow in a distributed manner and aws_conn_id is None or
252
+ empty, then default boto3 configuration would be used (and must be
253
+ maintained on each worker node).
254
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
255
+ :param verify: Whether or not to verify SSL certificates. See:
256
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
264
257
  """
265
258
 
266
- template_fields: Sequence[str] = ("bucket_name", "prefix")
259
+ template_fields: Sequence[str] = aws_template_fields("bucket_name", "prefix")
260
+ aws_hook_class = S3Hook
267
261
 
268
262
  def __init__(
269
263
  self,
270
264
  *,
271
265
  bucket_name: str,
272
266
  prefix: str,
273
- aws_conn_id: str | None = "aws_default",
274
267
  verify: bool | str | None = None,
275
268
  inactivity_period: float = 60 * 60,
276
269
  min_objects: int = 1,
@@ -291,15 +284,9 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
291
284
  self.inactivity_seconds = 0
292
285
  self.allow_delete = allow_delete
293
286
  self.deferrable = deferrable
294
- self.aws_conn_id = aws_conn_id
295
287
  self.verify = verify
296
288
  self.last_activity_time: datetime | None = None
297
289
 
298
- @cached_property
299
- def hook(self):
300
- """Returns S3Hook."""
301
- return S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
302
-
303
290
  def is_keys_unchanged(self, current_objects: set[str]) -> bool:
304
291
  """
305
292
  Check for new objects after the inactivity_period and update the sensor state accordingly.
@@ -382,7 +369,9 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
382
369
  inactivity_seconds=self.inactivity_seconds,
383
370
  allow_delete=self.allow_delete,
384
371
  aws_conn_id=self.aws_conn_id,
372
+ region_name=self.region_name,
385
373
  verify=self.verify,
374
+ botocore_config=self.botocore_config,
386
375
  last_activity_time=self.last_activity_time,
387
376
  ),
388
377
  method_name="execute_complete",