PyPI - apache-airflow-providers-amazon - Versions diffs - 8.24.0rc1__py3-none-any.whl → 8.25.0__py3-none-any.whl - Mend

apache-airflow-providers-amazon 8.24.0rc1py3-none-any.whl → 8.25.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

airflow/providers/amazon/aws/sensors/comprehend.py CHANGED Viewed

@@ -23,7 +23,10 @@ from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowSkipException
 from airflow.providers.amazon.aws.hooks.comprehend import ComprehendHook
 from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
-from airflow.providers.amazon.aws.triggers.comprehend import ComprehendPiiEntitiesDetectionJobCompletedTrigger
+from airflow.providers.amazon.aws.triggers.comprehend import (
+    ComprehendCreateDocumentClassifierCompletedTrigger,
+    ComprehendPiiEntitiesDetectionJobCompletedTrigger,
+)
 from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
@@ -145,3 +148,111 @@ class ComprehendStartPiiEntitiesDetectionJobCompletedSensor(ComprehendBaseSensor
         return self.hook.conn.describe_pii_entities_detection_job(JobId=self.job_id)[
             "PiiEntitiesDetectionJobProperties"
         ]["JobStatus"]
+class ComprehendCreateDocumentClassifierCompletedSensor(AwsBaseSensor[ComprehendHook]):
+    """
+    Poll the state of the document classifier until it reaches a completed state; fails if the job fails.
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:ComprehendCreateDocumentClassifierCompletedSensor`
+    :param document_classifier_arn: The arn of the Comprehend document classifier.
+    :param fail_on_warnings: If set to True, the document classifier training job will throw an error when the
+        status is TRAINED_WITH_WARNING. (default False)
+    :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
+        module to be installed.
+        (default: False, but can be overridden in config file by setting default_deferrable to True)
+    :param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
+    :param max_retries: Number of times before returning the current state. (default: 75)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
+    """
+    aws_hook_class = ComprehendHook
+    INTERMEDIATE_STATES: tuple[str, ...] = (
+        "SUBMITTED",
+        "TRAINING",
+    )
+    FAILURE_STATES: tuple[str, ...] = (
+        "DELETING",
+        "STOP_REQUESTED",
+        "STOPPED",
+        "IN_ERROR",
+    )
+    SUCCESS_STATES: tuple[str, ...] = ("TRAINED", "TRAINED_WITH_WARNING")
+    FAILURE_MESSAGE = "Comprehend document classifier failed."
+    template_fields: Sequence[str] = aws_template_fields("document_classifier_arn")
+    def __init__(
+        self,
+        *,
+        document_classifier_arn: str,
+        fail_on_warnings: bool = False,
+        max_retries: int = 75,
+        poke_interval: int = 120,
+        deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
+        aws_conn_id: str | None = "aws_default",
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.document_classifier_arn = document_classifier_arn
+        self.fail_on_warnings = fail_on_warnings
+        self.max_retries = max_retries
+        self.poke_interval = poke_interval
+        self.deferrable = deferrable
+        self.aws_conn_id = aws_conn_id
+    def execute(self, context: Context) -> Any:
+        if self.deferrable:
+            self.defer(
+                trigger=ComprehendCreateDocumentClassifierCompletedTrigger(
+                    document_classifier_arn=self.document_classifier_arn,
+                    waiter_delay=int(self.poke_interval),
+                    waiter_max_attempts=self.max_retries,
+                    aws_conn_id=self.aws_conn_id,
+                ),
+                method_name="poke",
+            )
+        else:
+            super().execute(context=context)
+    def poke(self, context: Context, **kwargs) -> bool:
+        status = self.hook.conn.describe_document_classifier(
+            DocumentClassifierArn=self.document_classifier_arn
+        )["DocumentClassifierProperties"]["Status"]
+        self.log.info(
+            "Poking for AWS Comprehend document classifier arn: %s status: %s",
+            self.document_classifier_arn,
+            status,
+        )
+        if status in self.FAILURE_STATES:
+            # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
+            if self.soft_fail:
+                raise AirflowSkipException(self.FAILURE_MESSAGE)
+            raise AirflowException(self.FAILURE_MESSAGE)
+        if status in self.SUCCESS_STATES:
+            self.hook.validate_document_classifier_training_status(
+                document_classifier_arn=self.document_classifier_arn, fail_on_warnings=self.fail_on_warnings
+            )
+            self.log.info("Comprehend document classifier `%s` complete.", self.document_classifier_arn)
+            return True
+        return False

airflow/providers/amazon/aws/sensors/glue.py CHANGED Viewed

@@ -18,10 +18,18 @@
 from __future__ import annotations
 from functools import cached_property
-from typing import TYPE_CHECKING, Sequence
+from typing import TYPE_CHECKING, Any, Sequence
+from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowSkipException
-from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
+from airflow.providers.amazon.aws.hooks.glue import GlueDataQualityHook, GlueJobHook
+from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
+from airflow.providers.amazon.aws.triggers.glue import (
+    GlueDataQualityRuleRecommendationRunCompleteTrigger,
+    GlueDataQualityRuleSetEvaluationRunCompleteTrigger,
+)
+from airflow.providers.amazon.aws.utils import validate_execute_complete_event
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 from airflow.sensors.base import BaseSensorOperator
 if TYPE_CHECKING:
@@ -91,3 +99,253 @@ class GlueJobSensor(BaseSensorOperator):
                     run_id=self.run_id,
                     continuation_tokens=self.next_log_tokens,
                 )
+class GlueDataQualityRuleSetEvaluationRunSensor(AwsBaseSensor[GlueDataQualityHook]):
+    """
+    Waits for an AWS Glue data quality ruleset evaluation run to reach any of the status below.
+    'FAILED', 'STOPPED', 'STOPPING', 'TIMEOUT', 'SUCCEEDED'
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:GlueDataQualityRuleSetEvaluationRunSensor`
+    :param evaluation_run_id: The AWS Glue data quality ruleset evaluation run identifier.
+    :param verify_result_status: Validate all the ruleset rules evaluation run results,
+        If any of the rule status is Fail or Error then an exception is thrown. (default: True)
+    :param show_results: Displays all the ruleset rules evaluation run results. (default: True)
+    :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
+        module to be installed.
+        (default: False, but can be overridden in config file by setting default_deferrable to True)
+    :param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
+    :param max_retries: Number of times before returning the current state. (default: 60)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
+    """
+    SUCCESS_STATES = ("SUCCEEDED",)
+    FAILURE_STATES = ("FAILED", "STOPPED", "STOPPING", "TIMEOUT")
+    aws_hook_class = GlueDataQualityHook
+    template_fields: Sequence[str] = aws_template_fields("evaluation_run_id")
+    def __init__(
+        self,
+        *,
+        evaluation_run_id: str,
+        show_results: bool = True,
+        verify_result_status: bool = True,
+        deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
+        poke_interval: int = 120,
+        max_retries: int = 60,
+        aws_conn_id: str | None = "aws_default",
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.evaluation_run_id = evaluation_run_id
+        self.show_results = show_results
+        self.verify_result_status = verify_result_status
+        self.aws_conn_id = aws_conn_id
+        self.max_retries = max_retries
+        self.poke_interval = poke_interval
+        self.deferrable = deferrable
+    def execute(self, context: Context) -> Any:
+        if self.deferrable:
+            self.defer(
+                trigger=GlueDataQualityRuleSetEvaluationRunCompleteTrigger(
+                    evaluation_run_id=self.evaluation_run_id,
+                    waiter_delay=int(self.poke_interval),
+                    waiter_max_attempts=self.max_retries,
+                    aws_conn_id=self.aws_conn_id,
+                ),
+                method_name="execute_complete",
+            )
+        else:
+            super().execute(context=context)
+    def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
+        event = validate_execute_complete_event(event)
+        if event["status"] != "success":
+            message = f"Error: AWS Glue data quality ruleset evaluation run: {event}"
+            if self.soft_fail:
+                raise AirflowSkipException(message)
+            raise AirflowException(message)
+        self.hook.validate_evaluation_run_results(
+            evaluation_run_id=event["evaluation_run_id"],
+            show_results=self.show_results,
+            verify_result_status=self.verify_result_status,
+        )
+        self.log.info("AWS Glue data quality ruleset evaluation run completed.")
+    def poke(self, context: Context):
+        self.log.info(
+            "Poking for AWS Glue data quality ruleset evaluation run RunId: %s", self.evaluation_run_id
+        )
+        response = self.hook.conn.get_data_quality_ruleset_evaluation_run(RunId=self.evaluation_run_id)
+        status = response.get("Status")
+        if status in self.SUCCESS_STATES:
+            self.hook.validate_evaluation_run_results(
+                evaluation_run_id=self.evaluation_run_id,
+                show_results=self.show_results,
+                verify_result_status=self.verify_result_status,
+            )
+            self.log.info(
+                "AWS Glue data quality ruleset evaluation run completed RunId: %s Run State: %s",
+                self.evaluation_run_id,
+                response["Status"],
+            )
+            return True
+        elif status in self.FAILURE_STATES:
+            job_error_message = (
+                f"Error: AWS Glue data quality ruleset evaluation run RunId: {self.evaluation_run_id} Run "
+                f"Status: {status}"
+                f": {response.get('ErrorString')}"
+            )
+            self.log.info(job_error_message)
+            # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
+            if self.soft_fail:
+                raise AirflowSkipException(job_error_message)
+            raise AirflowException(job_error_message)
+        else:
+            return False
+class GlueDataQualityRuleRecommendationRunSensor(AwsBaseSensor[GlueDataQualityHook]):
+    """
+    Waits for an AWS Glue data quality recommendation run to reach any of the status below.
+    'FAILED', 'STOPPED', 'STOPPING', 'TIMEOUT', 'SUCCEEDED'
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:GlueDataQualityRuleRecommendationRunSensor`
+    :param recommendation_run_id: The AWS Glue data quality rule recommendation run identifier.
+    :param show_results: Displays the recommended ruleset (a set of rules), when recommendation run completes. (default: True)
+    :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
+        module to be installed.
+        (default: False, but can be overridden in config file by setting default_deferrable to True)
+    :param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
+    :param max_retries: Number of times before returning the current state. (default: 60)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
+    """
+    SUCCESS_STATES = ("SUCCEEDED",)
+    FAILURE_STATES = ("FAILED", "STOPPED", "STOPPING", "TIMEOUT")
+    aws_hook_class = GlueDataQualityHook
+    template_fields: Sequence[str] = aws_template_fields("recommendation_run_id")
+    def __init__(
+        self,
+        *,
+        recommendation_run_id: str,
+        show_results: bool = True,
+        deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
+        poke_interval: int = 120,
+        max_retries: int = 60,
+        aws_conn_id: str | None = "aws_default",
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.recommendation_run_id = recommendation_run_id
+        self.show_results = show_results
+        self.deferrable = deferrable
+        self.poke_interval = poke_interval
+        self.max_retries = max_retries
+        self.aws_conn_id = aws_conn_id
+    def execute(self, context: Context) -> Any:
+        if self.deferrable:
+            self.defer(
+                trigger=GlueDataQualityRuleRecommendationRunCompleteTrigger(
+                    recommendation_run_id=self.recommendation_run_id,
+                    waiter_delay=int(self.poke_interval),
+                    waiter_max_attempts=self.max_retries,
+                    aws_conn_id=self.aws_conn_id,
+                ),
+                method_name="execute_complete",
+            )
+        else:
+            super().execute(context=context)
+    def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
+        event = validate_execute_complete_event(event)
+        if event["status"] != "success":
+            message = f"Error: AWS Glue data quality recommendation run: {event}"
+            if self.soft_fail:
+                raise AirflowSkipException(message)
+            raise AirflowException(message)
+        if self.show_results:
+            self.hook.log_recommendation_results(run_id=self.recommendation_run_id)
+        self.log.info("AWS Glue data quality recommendation run completed.")
+    def poke(self, context: Context) -> bool:
+        self.log.info(
+            "Poking for AWS Glue data quality recommendation run RunId: %s", self.recommendation_run_id
+        )
+        response = self.hook.conn.get_data_quality_rule_recommendation_run(RunId=self.recommendation_run_id)
+        status = response.get("Status")
+        if status in self.SUCCESS_STATES:
+            if self.show_results:
+                self.hook.log_recommendation_results(run_id=self.recommendation_run_id)
+            self.log.info(
+                "AWS Glue data quality recommendation run completed RunId: %s Run State: %s",
+                self.recommendation_run_id,
+                response["Status"],
+            )
+            return True
+        elif status in self.FAILURE_STATES:
+            job_error_message = (
+                f"Error: AWS Glue data quality recommendation run RunId: {self.recommendation_run_id} Run "
+                f"Status: {status}"
+                f": {response.get('ErrorString')}"
+            )
+            self.log.info(job_error_message)
+            # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
+            if self.soft_fail:
+                raise AirflowSkipException(job_error_message)
+            raise AirflowException(job_error_message)
+        else:
+            return False

airflow/providers/amazon/aws/sensors/s3.py CHANGED Viewed

@@ -78,6 +78,11 @@ class S3KeySensor(BaseSensorOperator):
                  CA cert bundle than the one used by botocore.
     :param deferrable: Run operator in the deferrable mode
     :param use_regex: whether to use regex to check bucket
+    :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
+        Acceptable values: Any top level attribute returned by s3.head_object. Specify * to return
+        all available attributes.
+        Default value: "Size".
+        If the requested attribute is not found, the key is still included and the value is None.
     """
     template_fields: Sequence[str] = ("bucket_key", "bucket_name")
@@ -93,6 +98,7 @@ class S3KeySensor(BaseSensorOperator):
         verify: str | bool | None = None,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         use_regex: bool = False,
+        metadata_keys: list[str] | None = None,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -104,14 +110,14 @@ class S3KeySensor(BaseSensorOperator):
         self.verify = verify
         self.deferrable = deferrable
         self.use_regex = use_regex
+        self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
     def _check_key(self, key):
         bucket_name, key = S3Hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
         self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
         """
-        Set variable `files` which contains a list of dict which contains only the size
-        If needed we might want to add other attributes later
+        Set variable `files` which contains a list of dict which contains attributes defined by the user
         Format: [{
             'Size': int
         }]
@@ -123,8 +129,21 @@ class S3KeySensor(BaseSensorOperator):
             if not key_matches:
                 return False
-            # Reduce the set of metadata to size only
-            files = [{"Size": f["Size"]} for f in key_matches]
+            # Reduce the set of metadata to requested attributes
+            files = []
+            for f in key_matches:
+                metadata = {}
+                if "*" in self.metadata_keys:
+                    metadata = self.hook.head_object(f["Key"], bucket_name)
+                else:
+                    for key in self.metadata_keys:
+                        try:
+                            metadata[key] = f[key]
+                        except KeyError:
+                            # supplied key might be from head_object response
+                            self.log.info("Key %s not found in response, performing head_object", key)
+                            metadata[key] = self.hook.head_object(f["Key"], bucket_name).get(key, None)
+                files.append(metadata)
         elif self.use_regex:
             keys = self.hook.get_file_metadata("", bucket_name)
             key_matches = [k for k in keys if re.match(pattern=key, string=k["Key"])]
@@ -134,7 +153,18 @@ class S3KeySensor(BaseSensorOperator):
             obj = self.hook.head_object(key, bucket_name)
             if obj is None:
                 return False
-            files = [{"Size": obj["ContentLength"]}]
+            metadata = {}
+            if "*" in self.metadata_keys:
+                metadata = self.hook.head_object(key, bucket_name)
+            else:
+                for key in self.metadata_keys:
+                    # backwards compatibility with original implementation
+                    if key == "Size":
+                        metadata[key] = obj.get("ContentLength")
+                    else:
+                        metadata[key] = obj.get(key, None)
+            files = [metadata]
         if self.check_fn is not None:
             return self.check_fn(files)

airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py CHANGED Viewed

@@ -105,7 +105,6 @@ class DynamoDBToS3Operator(AwsToAwsBaseOperator):
         "file_size",
         "dynamodb_scan_kwargs",
         "s3_key_prefix",
-        "process_func",
         "export_time",
         "export_format",
         "check_interval",

airflow/providers/amazon/aws/transfers/redshift_to_s3.py CHANGED Viewed

@@ -128,7 +128,7 @@ class RedshiftToS3Operator(BaseOperator):
         self, credentials_block: str, select_query: str, s3_key: str, unload_options: str
     ) -> str:
         # Un-escape already escaped queries
-        select_query = re.sub(r"''(.+)''", r"'\1'", select_query)
+        select_query = re.sub(r"''(.+?)''", r"'\1'", select_query)
         return f"""
                     UNLOAD ($${select_query}$$)
                     TO 's3://{self.s3_bucket}/{s3_key}'

airflow/providers/amazon/aws/triggers/comprehend.py CHANGED Viewed

@@ -59,3 +59,39 @@ class ComprehendPiiEntitiesDetectionJobCompletedTrigger(AwsBaseWaiterTrigger):
     def hook(self) -> AwsGenericHook:
         return ComprehendHook(aws_conn_id=self.aws_conn_id)
+class ComprehendCreateDocumentClassifierCompletedTrigger(AwsBaseWaiterTrigger):
+    """
+    Trigger when a Comprehend document classifier is complete.
+    :param document_classifier_arn: The arn of the Comprehend document classifier.
+    :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 120)
+    :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+    """
+    def __init__(
+        self,
+        *,
+        document_classifier_arn: str,
+        waiter_delay: int = 120,
+        waiter_max_attempts: int = 75,
+        aws_conn_id: str | None = "aws_default",
+    ) -> None:
+        super().__init__(
+            serialized_fields={"document_classifier_arn": document_classifier_arn},
+            waiter_name="create_document_classifier_complete",
+            waiter_args={"DocumentClassifierArn": document_classifier_arn},
+            failure_message="Comprehend create document classifier failed.",
+            status_message="Status of Comprehend create document classifier is",
+            status_queries=["DocumentClassifierProperties.Status"],
+            return_key="document_classifier_arn",
+            return_value=document_classifier_arn,
+            waiter_delay=waiter_delay,
+            waiter_max_attempts=waiter_max_attempts,
+            aws_conn_id=aws_conn_id,
+        )
+    def hook(self) -> AwsGenericHook:
+        return ComprehendHook(aws_conn_id=self.aws_conn_id)

airflow/providers/amazon/aws/triggers/glue.py CHANGED Viewed

@@ -19,10 +19,14 @@ from __future__ import annotations
 import asyncio
 from functools import cached_property
-from typing import Any, AsyncIterator
+from typing import TYPE_CHECKING, Any, AsyncIterator
-from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
+if TYPE_CHECKING:
+    from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
+from airflow.providers.amazon.aws.hooks.glue import GlueDataQualityHook, GlueJobHook
 from airflow.providers.amazon.aws.hooks.glue_catalog import GlueCatalogHook
+from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
 from airflow.triggers.base import BaseTrigger, TriggerEvent
@@ -148,3 +152,73 @@ class GlueCatalogPartitionTrigger(BaseTrigger):
                     break
                 else:
                     await asyncio.sleep(self.waiter_delay)
+class GlueDataQualityRuleSetEvaluationRunCompleteTrigger(AwsBaseWaiterTrigger):
+    """
+    Trigger when a AWS Glue data quality evaluation run complete.
+    :param evaluation_run_id: The AWS Glue data quality ruleset evaluation run identifier.
+    :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
+    :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+    """
+    def __init__(
+        self,
+        evaluation_run_id: str,
+        waiter_delay: int = 60,
+        waiter_max_attempts: int = 75,
+        aws_conn_id: str | None = "aws_default",
+    ):
+        super().__init__(
+            serialized_fields={"evaluation_run_id": evaluation_run_id},
+            waiter_name="data_quality_ruleset_evaluation_run_complete",
+            waiter_args={"RunId": evaluation_run_id},
+            failure_message="AWS Glue data quality ruleset evaluation run failed.",
+            status_message="Status of AWS Glue data quality ruleset evaluation run is",
+            status_queries=["Status"],
+            return_key="evaluation_run_id",
+            return_value=evaluation_run_id,
+            waiter_delay=waiter_delay,
+            waiter_max_attempts=waiter_max_attempts,
+            aws_conn_id=aws_conn_id,
+        )
+    def hook(self) -> AwsGenericHook:
+        return GlueDataQualityHook(aws_conn_id=self.aws_conn_id)
+class GlueDataQualityRuleRecommendationRunCompleteTrigger(AwsBaseWaiterTrigger):
+    """
+    Trigger when a AWS Glue data quality recommendation run complete.
+    :param recommendation_run_id: The AWS Glue data quality rule recommendation run identifier.
+    :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
+    :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+    """
+    def __init__(
+        self,
+        recommendation_run_id: str,
+        waiter_delay: int = 60,
+        waiter_max_attempts: int = 75,
+        aws_conn_id: str | None = "aws_default",
+    ):
+        super().__init__(
+            serialized_fields={"recommendation_run_id": recommendation_run_id},
+            waiter_name="data_quality_rule_recommendation_run_complete",
+            waiter_args={"RunId": recommendation_run_id},
+            failure_message="AWS Glue data quality recommendation run failed.",
+            status_message="Status of AWS Glue data quality recommendation run is",
+            status_queries=["Status"],
+            return_key="recommendation_run_id",
+            return_value=recommendation_run_id,
+            waiter_delay=waiter_delay,
+            waiter_max_attempts=waiter_max_attempts,
+            aws_conn_id=aws_conn_id,
+        )
+    def hook(self) -> AwsGenericHook:
+        return GlueDataQualityHook(aws_conn_id=self.aws_conn_id)

airflow/providers/amazon/aws/utils/__init__.py CHANGED Viewed

@@ -20,10 +20,9 @@ import logging
 import re
 from datetime import datetime, timezone
 from enum import Enum
+from importlib import metadata
 from typing import Any
-import importlib_metadata
 from airflow.exceptions import AirflowException
 from airflow.utils.helpers import prune_dict
 from airflow.version import version
@@ -78,7 +77,7 @@ def get_airflow_version() -> tuple[int, ...]:
 def get_botocore_version() -> tuple[int, ...]:
     """Return the version number of the installed botocore package in the form of a tuple[int,...]."""
-    return tuple(map(int, importlib_metadata.version("botocore").split(".")[:3]))
+    return tuple(map(int, metadata.version("botocore").split(".")[:3]))
 def validate_execute_complete_event(event: dict[str, Any] | None = None) -> dict[str, Any]:

apache-airflow-providers-amazon 8.24.0rc1__py3-none-any.whl → 8.25.0__py3-none-any.whl

apache-airflow-providers-amazon 8.24.0rc1py3-none-any.whl → 8.25.0py3-none-any.whl