apache-airflow-providers-amazon 9.4.0rc1__py3-none-any.whl → 9.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/auth_manager/avp/entities.py +3 -1
- airflow/providers/amazon/aws/auth_manager/avp/facade.py +1 -1
- airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +80 -110
- airflow/providers/amazon/aws/auth_manager/router/login.py +11 -4
- airflow/providers/amazon/aws/auth_manager/user.py +7 -4
- airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +1 -1
- airflow/providers/amazon/aws/hooks/appflow.py +5 -15
- airflow/providers/amazon/aws/hooks/athena_sql.py +2 -2
- airflow/providers/amazon/aws/hooks/base_aws.py +34 -1
- airflow/providers/amazon/aws/hooks/batch_client.py +1 -2
- airflow/providers/amazon/aws/hooks/batch_waiters.py +11 -3
- airflow/providers/amazon/aws/hooks/dms.py +3 -1
- airflow/providers/amazon/aws/hooks/ec2.py +1 -1
- airflow/providers/amazon/aws/hooks/eks.py +3 -6
- airflow/providers/amazon/aws/hooks/glue.py +6 -2
- airflow/providers/amazon/aws/hooks/logs.py +2 -2
- airflow/providers/amazon/aws/hooks/mwaa.py +79 -15
- airflow/providers/amazon/aws/hooks/redshift_cluster.py +10 -10
- airflow/providers/amazon/aws/hooks/redshift_data.py +3 -4
- airflow/providers/amazon/aws/hooks/s3.py +3 -1
- airflow/providers/amazon/aws/hooks/sagemaker.py +2 -2
- airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +188 -0
- airflow/providers/amazon/aws/links/athena.py +1 -2
- airflow/providers/amazon/aws/links/base_aws.py +8 -1
- airflow/providers/amazon/aws/links/sagemaker_unified_studio.py +27 -0
- airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +174 -54
- airflow/providers/amazon/aws/log/s3_task_handler.py +136 -84
- airflow/providers/amazon/aws/notifications/chime.py +1 -2
- airflow/providers/amazon/aws/notifications/sns.py +1 -1
- airflow/providers/amazon/aws/notifications/sqs.py +1 -1
- airflow/providers/amazon/aws/operators/ec2.py +91 -83
- airflow/providers/amazon/aws/operators/eks.py +3 -3
- airflow/providers/amazon/aws/operators/mwaa.py +73 -2
- airflow/providers/amazon/aws/operators/redshift_cluster.py +10 -3
- airflow/providers/amazon/aws/operators/s3.py +147 -157
- airflow/providers/amazon/aws/operators/sagemaker.py +4 -7
- airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +155 -0
- airflow/providers/amazon/aws/sensors/ec2.py +5 -12
- airflow/providers/amazon/aws/sensors/emr.py +1 -1
- airflow/providers/amazon/aws/sensors/glacier.py +1 -1
- airflow/providers/amazon/aws/sensors/mwaa.py +161 -0
- airflow/providers/amazon/aws/sensors/rds.py +10 -5
- airflow/providers/amazon/aws/sensors/s3.py +32 -43
- airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +73 -0
- airflow/providers/amazon/aws/sensors/step_function.py +2 -1
- airflow/providers/amazon/aws/transfers/mongo_to_s3.py +2 -2
- airflow/providers/amazon/aws/transfers/redshift_to_s3.py +19 -4
- airflow/providers/amazon/aws/transfers/s3_to_redshift.py +19 -3
- airflow/providers/amazon/aws/transfers/sql_to_s3.py +1 -1
- airflow/providers/amazon/aws/triggers/README.md +4 -4
- airflow/providers/amazon/aws/triggers/base.py +11 -2
- airflow/providers/amazon/aws/triggers/ecs.py +6 -2
- airflow/providers/amazon/aws/triggers/eks.py +2 -2
- airflow/providers/amazon/aws/triggers/glue.py +1 -1
- airflow/providers/amazon/aws/triggers/mwaa.py +128 -0
- airflow/providers/amazon/aws/triggers/s3.py +31 -6
- airflow/providers/amazon/aws/triggers/sagemaker.py +2 -2
- airflow/providers/amazon/aws/triggers/sagemaker_unified_studio.py +66 -0
- airflow/providers/amazon/aws/triggers/sqs.py +11 -3
- airflow/providers/amazon/aws/{auth_manager/security_manager/__init__.py → utils/sagemaker_unified_studio.py} +12 -0
- airflow/providers/amazon/aws/utils/waiter_with_logging.py +4 -3
- airflow/providers/amazon/aws/waiters/mwaa.json +36 -0
- airflow/providers/amazon/get_provider_info.py +46 -5
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/METADATA +40 -33
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/RECORD +68 -61
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/WHEEL +1 -1
- airflow/providers/amazon/aws/auth_manager/security_manager/aws_security_manager_override.py +0 -40
- {apache_airflow_providers_amazon-9.4.0rc1.dist-info → apache_airflow_providers_amazon-9.5.0.dist-info}/entry_points.txt +0 -0
@@ -44,7 +44,6 @@ from airflow.providers.amazon.aws.utils import trim_none_values, validate_execut
|
|
44
44
|
from airflow.providers.amazon.aws.utils.sagemaker import ApprovalStatus
|
45
45
|
from airflow.providers.amazon.aws.utils.tags import format_tags
|
46
46
|
from airflow.utils.helpers import prune_dict
|
47
|
-
from airflow.utils.json import AirflowJsonEncoder
|
48
47
|
|
49
48
|
if TYPE_CHECKING:
|
50
49
|
from airflow.providers.common.compat.openlineage.facet import Dataset
|
@@ -56,7 +55,7 @@ CHECK_INTERVAL_SECOND: int = 30
|
|
56
55
|
|
57
56
|
|
58
57
|
def serialize(result: dict) -> dict:
|
59
|
-
return json.loads(json.dumps(result,
|
58
|
+
return json.loads(json.dumps(result, default=repr))
|
60
59
|
|
61
60
|
|
62
61
|
class SageMakerBaseOperator(BaseOperator):
|
@@ -171,7 +170,7 @@ class SageMakerBaseOperator(BaseOperator):
|
|
171
170
|
timestamp = str(
|
172
171
|
time.time_ns() // 1000000000
|
173
172
|
) # only keep the relevant datetime (first 10 digits)
|
174
|
-
name = f"{proposed_name[:max_name_len - len(timestamp) - 1]}-{timestamp}" # we subtract one to make provision for the dash between the truncated name and timestamp
|
173
|
+
name = f"{proposed_name[: max_name_len - len(timestamp) - 1]}-{timestamp}" # we subtract one to make provision for the dash between the truncated name and timestamp
|
175
174
|
self.log.info("Changed %s name to '%s' to avoid collision.", resource_type, name)
|
176
175
|
return name
|
177
176
|
|
@@ -179,8 +178,7 @@ class SageMakerBaseOperator(BaseOperator):
|
|
179
178
|
"""Raise exception if resource type is not 'model' or 'job'."""
|
180
179
|
if resource_type not in ("model", "job"):
|
181
180
|
raise AirflowException(
|
182
|
-
"Argument resource_type accepts only 'model' and 'job'. "
|
183
|
-
f"Provided value: '{resource_type}'."
|
181
|
+
f"Argument resource_type accepts only 'model' and 'job'. Provided value: '{resource_type}'."
|
184
182
|
)
|
185
183
|
|
186
184
|
def _check_if_job_exists(self, job_name: str, describe_func: Callable[[str], Any]) -> bool:
|
@@ -560,8 +558,7 @@ class SageMakerEndpointOperator(SageMakerBaseOperator):
|
|
560
558
|
self.operation = "update"
|
561
559
|
sagemaker_operation = self.hook.update_endpoint
|
562
560
|
self.log.warning(
|
563
|
-
"cannot create already existing endpoint %s, "
|
564
|
-
"updating it with the given config instead",
|
561
|
+
"cannot create already existing endpoint %s, updating it with the given config instead",
|
565
562
|
endpoint_info["EndpointName"],
|
566
563
|
)
|
567
564
|
if "Tags" in endpoint_info:
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
"""This module contains the Amazon SageMaker Unified Studio Notebook operator."""
|
19
|
+
|
20
|
+
from __future__ import annotations
|
21
|
+
|
22
|
+
from functools import cached_property
|
23
|
+
from typing import TYPE_CHECKING
|
24
|
+
|
25
|
+
from airflow.configuration import conf
|
26
|
+
from airflow.exceptions import AirflowException
|
27
|
+
from airflow.models import BaseOperator
|
28
|
+
from airflow.providers.amazon.aws.hooks.sagemaker_unified_studio import (
|
29
|
+
SageMakerNotebookHook,
|
30
|
+
)
|
31
|
+
from airflow.providers.amazon.aws.links.sagemaker_unified_studio import (
|
32
|
+
SageMakerUnifiedStudioLink,
|
33
|
+
)
|
34
|
+
from airflow.providers.amazon.aws.triggers.sagemaker_unified_studio import (
|
35
|
+
SageMakerNotebookJobTrigger,
|
36
|
+
)
|
37
|
+
|
38
|
+
if TYPE_CHECKING:
|
39
|
+
from airflow.utils.context import Context
|
40
|
+
|
41
|
+
|
42
|
+
class SageMakerNotebookOperator(BaseOperator):
|
43
|
+
"""
|
44
|
+
Provides Artifact execution functionality for Sagemaker Unified Studio Workflows.
|
45
|
+
|
46
|
+
Examples:
|
47
|
+
.. code-block:: python
|
48
|
+
|
49
|
+
from airflow.providers.amazon.aws.operators.sagemaker_unified_studio import SageMakerNotebookOperator
|
50
|
+
|
51
|
+
notebook_operator = SageMakerNotebookOperator(
|
52
|
+
task_id="notebook_task",
|
53
|
+
input_config={"input_path": "path/to/notebook.ipynb", "input_params": ""},
|
54
|
+
output_config={"output_format": "ipynb"},
|
55
|
+
wait_for_completion=True,
|
56
|
+
waiter_delay=10,
|
57
|
+
waiter_max_attempts=1440,
|
58
|
+
)
|
59
|
+
|
60
|
+
:param task_id: A unique, meaningful id for the task.
|
61
|
+
:param input_config: Configuration for the input file. Input path should be specified as a relative path.
|
62
|
+
The provided relative path will be automatically resolved to an absolute path within
|
63
|
+
the context of the user's home directory in the IDE. Input params should be a dict.
|
64
|
+
Example: {'input_path': 'folder/input/notebook.ipynb', 'input_params':{'key': 'value'}}
|
65
|
+
:param output_config: Configuration for the output format. It should include an output_format parameter to control
|
66
|
+
the format of the notebook execution output.
|
67
|
+
Example: {"output_formats": ["NOTEBOOK"]}
|
68
|
+
:param compute: compute configuration to use for the artifact execution. This is a required attribute
|
69
|
+
if the execution is on a remote compute.
|
70
|
+
Example: { "InstanceType": "ml.m5.large", "VolumeSizeInGB": 30, "VolumeKmsKeyId": "", "ImageUri": "string", "ContainerEntrypoint": [ "string" ]}
|
71
|
+
:param termination_condition: conditions to match to terminate the remote execution.
|
72
|
+
Example: { "MaxRuntimeInSeconds": 3600 }
|
73
|
+
:param tags: tags to be associated with the remote execution runs.
|
74
|
+
Example: { "md_analytics": "logs" }
|
75
|
+
:param wait_for_completion: Indicates whether to wait for the notebook execution to complete. If True, wait for completion; if False, don't wait.
|
76
|
+
:param waiter_delay: Interval in seconds to check the notebook execution status.
|
77
|
+
:param waiter_max_attempts: Number of attempts to wait before returning FAILED.
|
78
|
+
:param deferrable: If True, the operator will wait asynchronously for the job to complete.
|
79
|
+
This implies waiting for completion. This mode requires aiobotocore module to be installed.
|
80
|
+
(default: False)
|
81
|
+
|
82
|
+
.. seealso::
|
83
|
+
For more information on how to use this operator, take a look at the guide:
|
84
|
+
:ref:`howto/operator:SageMakerNotebookOperator`
|
85
|
+
"""
|
86
|
+
|
87
|
+
operator_extra_links = (SageMakerUnifiedStudioLink(),)
|
88
|
+
|
89
|
+
def __init__(
|
90
|
+
self,
|
91
|
+
task_id: str,
|
92
|
+
input_config: dict,
|
93
|
+
output_config: dict | None = None,
|
94
|
+
compute: dict | None = None,
|
95
|
+
termination_condition: dict | None = None,
|
96
|
+
tags: dict | None = None,
|
97
|
+
wait_for_completion: bool = True,
|
98
|
+
waiter_delay: int = 10,
|
99
|
+
waiter_max_attempts: int = 1440,
|
100
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
101
|
+
**kwargs,
|
102
|
+
):
|
103
|
+
super().__init__(task_id=task_id, **kwargs)
|
104
|
+
self.execution_name = task_id
|
105
|
+
self.input_config = input_config
|
106
|
+
self.output_config = output_config or {"output_formats": ["NOTEBOOK"]}
|
107
|
+
self.compute = compute or {}
|
108
|
+
self.termination_condition = termination_condition or {}
|
109
|
+
self.tags = tags or {}
|
110
|
+
self.wait_for_completion = wait_for_completion
|
111
|
+
self.waiter_delay = waiter_delay
|
112
|
+
self.waiter_max_attempts = waiter_max_attempts
|
113
|
+
self.deferrable = deferrable
|
114
|
+
self.input_kwargs = kwargs
|
115
|
+
|
116
|
+
@cached_property
|
117
|
+
def notebook_execution_hook(self):
|
118
|
+
if not self.input_config:
|
119
|
+
raise AirflowException("input_config is required")
|
120
|
+
|
121
|
+
if "input_path" not in self.input_config:
|
122
|
+
raise AirflowException("input_path is a required field in the input_config")
|
123
|
+
|
124
|
+
return SageMakerNotebookHook(
|
125
|
+
input_config=self.input_config,
|
126
|
+
output_config=self.output_config,
|
127
|
+
execution_name=self.execution_name,
|
128
|
+
compute=self.compute,
|
129
|
+
termination_condition=self.termination_condition,
|
130
|
+
tags=self.tags,
|
131
|
+
waiter_delay=self.waiter_delay,
|
132
|
+
waiter_max_attempts=self.waiter_max_attempts,
|
133
|
+
)
|
134
|
+
|
135
|
+
def execute(self, context: Context):
|
136
|
+
notebook_execution = self.notebook_execution_hook.start_notebook_execution()
|
137
|
+
execution_id = notebook_execution["execution_id"]
|
138
|
+
|
139
|
+
if self.deferrable:
|
140
|
+
self.defer(
|
141
|
+
trigger=SageMakerNotebookJobTrigger(
|
142
|
+
execution_id=execution_id,
|
143
|
+
execution_name=self.execution_name,
|
144
|
+
waiter_delay=self.waiter_delay,
|
145
|
+
waiter_max_attempts=self.waiter_max_attempts,
|
146
|
+
),
|
147
|
+
method_name="execute_complete",
|
148
|
+
)
|
149
|
+
elif self.wait_for_completion:
|
150
|
+
response = self.notebook_execution_hook.wait_for_execution_completion(execution_id, context)
|
151
|
+
status = response["Status"]
|
152
|
+
log_info_message = (
|
153
|
+
f"Notebook Execution: {self.execution_name} Status: {status}. Run Id: {execution_id}"
|
154
|
+
)
|
155
|
+
self.log.info(log_info_message)
|
@@ -18,21 +18,21 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
from collections.abc import Sequence
|
21
|
-
from functools import cached_property
|
22
21
|
from typing import TYPE_CHECKING, Any
|
23
22
|
|
24
23
|
from airflow.configuration import conf
|
25
24
|
from airflow.exceptions import AirflowException
|
26
25
|
from airflow.providers.amazon.aws.hooks.ec2 import EC2Hook
|
26
|
+
from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
|
27
27
|
from airflow.providers.amazon.aws.triggers.ec2 import EC2StateSensorTrigger
|
28
28
|
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
29
|
-
from airflow.
|
29
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
30
30
|
|
31
31
|
if TYPE_CHECKING:
|
32
32
|
from airflow.utils.context import Context
|
33
33
|
|
34
34
|
|
35
|
-
class EC2InstanceStateSensor(
|
35
|
+
class EC2InstanceStateSensor(AwsBaseSensor[EC2Hook]):
|
36
36
|
"""
|
37
37
|
Poll the state of the AWS EC2 instance until the instance reaches the target state.
|
38
38
|
|
@@ -46,7 +46,8 @@ class EC2InstanceStateSensor(BaseSensorOperator):
|
|
46
46
|
:param deferrable: if True, the sensor will run in deferrable mode
|
47
47
|
"""
|
48
48
|
|
49
|
-
|
49
|
+
aws_hook_class = EC2Hook
|
50
|
+
template_fields: Sequence[str] = aws_template_fields("target_state", "instance_id", "region_name")
|
50
51
|
ui_color = "#cc8811"
|
51
52
|
ui_fgcolor = "#ffffff"
|
52
53
|
valid_states = ["running", "stopped", "terminated"]
|
@@ -56,8 +57,6 @@ class EC2InstanceStateSensor(BaseSensorOperator):
|
|
56
57
|
*,
|
57
58
|
target_state: str,
|
58
59
|
instance_id: str,
|
59
|
-
aws_conn_id: str | None = "aws_default",
|
60
|
-
region_name: str | None = None,
|
61
60
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
62
61
|
**kwargs,
|
63
62
|
):
|
@@ -66,8 +65,6 @@ class EC2InstanceStateSensor(BaseSensorOperator):
|
|
66
65
|
super().__init__(**kwargs)
|
67
66
|
self.target_state = target_state
|
68
67
|
self.instance_id = instance_id
|
69
|
-
self.aws_conn_id = aws_conn_id
|
70
|
-
self.region_name = region_name
|
71
68
|
self.deferrable = deferrable
|
72
69
|
|
73
70
|
def execute(self, context: Context) -> Any:
|
@@ -85,10 +82,6 @@ class EC2InstanceStateSensor(BaseSensorOperator):
|
|
85
82
|
else:
|
86
83
|
super().execute(context=context)
|
87
84
|
|
88
|
-
@cached_property
|
89
|
-
def hook(self):
|
90
|
-
return EC2Hook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
|
91
|
-
|
92
85
|
def poke(self, context: Context):
|
93
86
|
instance_state = self.hook.get_instance_state(instance_id=self.instance_id)
|
94
87
|
self.log.info("instance state: %s", instance_state)
|
@@ -311,7 +311,7 @@ class EmrContainerSensor(BaseSensorOperator):
|
|
311
311
|
)
|
312
312
|
|
313
313
|
if state in self.FAILURE_STATES:
|
314
|
-
raise AirflowException("EMR Containers sensor failed")
|
314
|
+
raise AirflowException(f"EMR Containers sensor failed due to state: {state}")
|
315
315
|
|
316
316
|
if state in self.INTERMEDIATE_STATES:
|
317
317
|
return False
|
@@ -95,5 +95,5 @@ class GlacierJobOperationSensor(AwsBaseSensor[GlacierHook]):
|
|
95
95
|
return False
|
96
96
|
else:
|
97
97
|
raise AirflowException(
|
98
|
-
f
|
98
|
+
f"Sensor failed. Job status: {response['Action']}, code status: {response['StatusCode']}"
|
99
99
|
)
|
@@ -0,0 +1,161 @@
|
|
1
|
+
#
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
4
|
+
# distributed with this work for additional information
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
7
|
+
# "License"); you may not use this file except in compliance
|
8
|
+
# with the License. You may obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
13
|
+
# software distributed under the License is distributed on an
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
# KIND, either express or implied. See the License for the
|
16
|
+
# specific language governing permissions and limitations
|
17
|
+
# under the License.
|
18
|
+
from __future__ import annotations
|
19
|
+
|
20
|
+
from collections.abc import Collection, Sequence
|
21
|
+
from typing import TYPE_CHECKING, Any
|
22
|
+
|
23
|
+
from airflow.configuration import conf
|
24
|
+
from airflow.exceptions import AirflowException
|
25
|
+
from airflow.providers.amazon.aws.hooks.mwaa import MwaaHook
|
26
|
+
from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
|
27
|
+
from airflow.providers.amazon.aws.triggers.mwaa import MwaaDagRunCompletedTrigger
|
28
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
29
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
30
|
+
from airflow.utils.state import DagRunState
|
31
|
+
|
32
|
+
if TYPE_CHECKING:
|
33
|
+
from airflow.utils.context import Context
|
34
|
+
|
35
|
+
|
36
|
+
class MwaaDagRunSensor(AwsBaseSensor[MwaaHook]):
|
37
|
+
"""
|
38
|
+
Waits for a DAG Run in an MWAA Environment to complete.
|
39
|
+
|
40
|
+
If the DAG Run fails, an AirflowException is thrown.
|
41
|
+
|
42
|
+
.. seealso::
|
43
|
+
For more information on how to use this sensor, take a look at the guide:
|
44
|
+
:ref:`howto/sensor:MwaaDagRunSensor`
|
45
|
+
|
46
|
+
:param external_env_name: The external MWAA environment name that contains the DAG Run you want to wait for
|
47
|
+
(templated)
|
48
|
+
:param external_dag_id: The DAG ID in the external MWAA environment that contains the DAG Run you want to wait for
|
49
|
+
(templated)
|
50
|
+
:param external_dag_run_id: The DAG Run ID in the external MWAA environment that you want to wait for (templated)
|
51
|
+
:param success_states: Collection of DAG Run states that would make this task marked as successful, default is
|
52
|
+
``{airflow.utils.state.DagRunState.SUCCESS}`` (templated)
|
53
|
+
:param failure_states: Collection of DAG Run states that would make this task marked as failed and raise an
|
54
|
+
AirflowException, default is ``{airflow.utils.state.DagRunState.FAILED}`` (templated)
|
55
|
+
:param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
|
56
|
+
module to be installed.
|
57
|
+
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
58
|
+
:param poke_interval: Polling period in seconds to check for the status of the job. (default: 60)
|
59
|
+
:param max_retries: Number of times before returning the current state. (default: 720)
|
60
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
61
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
62
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
63
|
+
empty, then default boto3 configuration would be used (and must be
|
64
|
+
maintained on each worker node).
|
65
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
66
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
67
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
68
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
69
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
70
|
+
"""
|
71
|
+
|
72
|
+
aws_hook_class = MwaaHook
|
73
|
+
template_fields: Sequence[str] = aws_template_fields(
|
74
|
+
"external_env_name",
|
75
|
+
"external_dag_id",
|
76
|
+
"external_dag_run_id",
|
77
|
+
"success_states",
|
78
|
+
"failure_states",
|
79
|
+
"deferrable",
|
80
|
+
"max_retries",
|
81
|
+
"poke_interval",
|
82
|
+
)
|
83
|
+
|
84
|
+
def __init__(
|
85
|
+
self,
|
86
|
+
*,
|
87
|
+
external_env_name: str,
|
88
|
+
external_dag_id: str,
|
89
|
+
external_dag_run_id: str,
|
90
|
+
success_states: Collection[str] | None = None,
|
91
|
+
failure_states: Collection[str] | None = None,
|
92
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
93
|
+
poke_interval: int = 60,
|
94
|
+
max_retries: int = 720,
|
95
|
+
**kwargs,
|
96
|
+
):
|
97
|
+
super().__init__(**kwargs)
|
98
|
+
|
99
|
+
self.success_states = set(success_states) if success_states else {DagRunState.SUCCESS.value}
|
100
|
+
self.failure_states = set(failure_states) if failure_states else {DagRunState.FAILED.value}
|
101
|
+
|
102
|
+
if len(self.success_states & self.failure_states):
|
103
|
+
raise ValueError("success_states and failure_states must not have any values in common")
|
104
|
+
|
105
|
+
self.external_env_name = external_env_name
|
106
|
+
self.external_dag_id = external_dag_id
|
107
|
+
self.external_dag_run_id = external_dag_run_id
|
108
|
+
self.deferrable = deferrable
|
109
|
+
self.poke_interval = poke_interval
|
110
|
+
self.max_retries = max_retries
|
111
|
+
|
112
|
+
def poke(self, context: Context) -> bool:
|
113
|
+
self.log.info(
|
114
|
+
"Poking for DAG run %s of DAG %s in MWAA environment %s",
|
115
|
+
self.external_dag_run_id,
|
116
|
+
self.external_dag_id,
|
117
|
+
self.external_env_name,
|
118
|
+
)
|
119
|
+
response = self.hook.invoke_rest_api(
|
120
|
+
env_name=self.external_env_name,
|
121
|
+
path=f"/dags/{self.external_dag_id}/dagRuns/{self.external_dag_run_id}",
|
122
|
+
method="GET",
|
123
|
+
)
|
124
|
+
|
125
|
+
# If RestApiStatusCode == 200, the RestApiResponse must have the "state" key, otherwise something terrible has
|
126
|
+
# happened in the API and KeyError would be raised
|
127
|
+
# If RestApiStatusCode >= 300, a botocore exception would've already been raised during the
|
128
|
+
# self.hook.invoke_rest_api call
|
129
|
+
# The scope of this sensor is going to only be raising AirflowException due to failure of the DAGRun
|
130
|
+
|
131
|
+
state = response["RestApiResponse"]["state"]
|
132
|
+
|
133
|
+
if state in self.failure_states:
|
134
|
+
raise AirflowException(
|
135
|
+
f"The DAG run {self.external_dag_run_id} of DAG {self.external_dag_id} in MWAA environment {self.external_env_name} "
|
136
|
+
f"failed with state: {state}"
|
137
|
+
)
|
138
|
+
|
139
|
+
return state in self.success_states
|
140
|
+
|
141
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
142
|
+
validate_execute_complete_event(event)
|
143
|
+
|
144
|
+
def execute(self, context: Context):
|
145
|
+
if self.deferrable:
|
146
|
+
self.defer(
|
147
|
+
trigger=MwaaDagRunCompletedTrigger(
|
148
|
+
external_env_name=self.external_env_name,
|
149
|
+
external_dag_id=self.external_dag_id,
|
150
|
+
external_dag_run_id=self.external_dag_run_id,
|
151
|
+
success_states=self.success_states,
|
152
|
+
failure_states=self.failure_states,
|
153
|
+
# somehow the type of poke_interval is derived as float ??
|
154
|
+
waiter_delay=self.poke_interval, # type: ignore[arg-type]
|
155
|
+
waiter_max_attempts=self.max_retries,
|
156
|
+
aws_conn_id=self.aws_conn_id,
|
157
|
+
),
|
158
|
+
method_name="execute_complete",
|
159
|
+
)
|
160
|
+
else:
|
161
|
+
super().execute(context=context)
|
@@ -20,7 +20,7 @@ from collections.abc import Sequence
|
|
20
20
|
from functools import cached_property
|
21
21
|
from typing import TYPE_CHECKING
|
22
22
|
|
23
|
-
from airflow.exceptions import AirflowNotFoundException
|
23
|
+
from airflow.exceptions import AirflowException, AirflowNotFoundException
|
24
24
|
from airflow.providers.amazon.aws.hooks.rds import RdsHook
|
25
25
|
from airflow.providers.amazon.aws.utils.rds import RdsDbType
|
26
26
|
from airflow.sensors.base import BaseSensorOperator
|
@@ -104,18 +104,17 @@ class RdsExportTaskExistenceSensor(RdsBaseSensor):
|
|
104
104
|
|
105
105
|
:param export_task_identifier: A unique identifier for the snapshot export task.
|
106
106
|
:param target_statuses: Target status of export task
|
107
|
+
:param error_statuses: Target error status of export task to fail the sensor
|
107
108
|
"""
|
108
109
|
|
109
|
-
template_fields: Sequence[str] = (
|
110
|
-
"export_task_identifier",
|
111
|
-
"target_statuses",
|
112
|
-
)
|
110
|
+
template_fields: Sequence[str] = ("export_task_identifier", "target_statuses", "error_statuses")
|
113
111
|
|
114
112
|
def __init__(
|
115
113
|
self,
|
116
114
|
*,
|
117
115
|
export_task_identifier: str,
|
118
116
|
target_statuses: list[str] | None = None,
|
117
|
+
error_statuses: list[str] | None = None,
|
119
118
|
aws_conn_id: str | None = "aws_default",
|
120
119
|
**kwargs,
|
121
120
|
):
|
@@ -129,6 +128,7 @@ class RdsExportTaskExistenceSensor(RdsBaseSensor):
|
|
129
128
|
"canceling",
|
130
129
|
"canceled",
|
131
130
|
]
|
131
|
+
self.error_statuses = error_statuses or ["failed"]
|
132
132
|
|
133
133
|
def poke(self, context: Context):
|
134
134
|
self.log.info(
|
@@ -136,6 +136,11 @@ class RdsExportTaskExistenceSensor(RdsBaseSensor):
|
|
136
136
|
)
|
137
137
|
try:
|
138
138
|
state = self.hook.get_export_task_state(self.export_task_identifier)
|
139
|
+
if state in self.error_statuses:
|
140
|
+
raise AirflowException(
|
141
|
+
f"Export task {self.export_task_identifier} failed with status {state}"
|
142
|
+
)
|
143
|
+
|
139
144
|
except AirflowNotFoundException:
|
140
145
|
return False
|
141
146
|
return state in self.target_statuses
|