PyPI - apache-airflow-providers-amazon - Versions diffs - 8.24.0__py3-none-any.whl → 8.24.0rc1__py3-none-any.whl - Mend

apache-airflow-providers-amazon 8.24.0py3-none-any.whl → 8.24.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

airflow/providers/amazon/aws/sensors/glue.py CHANGED Viewed

@@ -18,18 +18,10 @@
 from __future__ import annotations
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Sequence
+from typing import TYPE_CHECKING, Sequence
-from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowSkipException
-from airflow.providers.amazon.aws.hooks.glue import GlueDataQualityHook, GlueJobHook
-from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
-from airflow.providers.amazon.aws.triggers.glue import (
-    GlueDataQualityRuleRecommendationRunCompleteTrigger,
-    GlueDataQualityRuleSetEvaluationRunCompleteTrigger,
-)
-from airflow.providers.amazon.aws.utils import validate_execute_complete_event
-from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
+from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
 from airflow.sensors.base import BaseSensorOperator
 if TYPE_CHECKING:
@@ -99,253 +91,3 @@ class GlueJobSensor(BaseSensorOperator):
                     run_id=self.run_id,
                     continuation_tokens=self.next_log_tokens,
                 )
-class GlueDataQualityRuleSetEvaluationRunSensor(AwsBaseSensor[GlueDataQualityHook]):
-    """
-    Waits for an AWS Glue data quality ruleset evaluation run to reach any of the status below.
-    'FAILED', 'STOPPED', 'STOPPING', 'TIMEOUT', 'SUCCEEDED'
-    .. seealso::
-        For more information on how to use this sensor, take a look at the guide:
-        :ref:`howto/sensor:GlueDataQualityRuleSetEvaluationRunSensor`
-    :param evaluation_run_id: The AWS Glue data quality ruleset evaluation run identifier.
-    :param verify_result_status: Validate all the ruleset rules evaluation run results,
-        If any of the rule status is Fail or Error then an exception is thrown. (default: True)
-    :param show_results: Displays all the ruleset rules evaluation run results. (default: True)
-    :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
-        module to be installed.
-        (default: False, but can be overridden in config file by setting default_deferrable to True)
-    :param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
-    :param max_retries: Number of times before returning the current state. (default: 60)
-    :param aws_conn_id: The Airflow connection used for AWS credentials.
-        If this is ``None`` or empty then the default boto3 behaviour is used. If
-        running Airflow in a distributed manner and aws_conn_id is None or
-        empty, then default boto3 configuration would be used (and must be
-        maintained on each worker node).
-    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
-    :param verify: Whether to verify SSL certificates. See:
-        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
-    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
-        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
-    """
-    SUCCESS_STATES = ("SUCCEEDED",)
-    FAILURE_STATES = ("FAILED", "STOPPED", "STOPPING", "TIMEOUT")
-    aws_hook_class = GlueDataQualityHook
-    template_fields: Sequence[str] = aws_template_fields("evaluation_run_id")
-    def __init__(
-        self,
-        *,
-        evaluation_run_id: str,
-        show_results: bool = True,
-        verify_result_status: bool = True,
-        deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
-        poke_interval: int = 120,
-        max_retries: int = 60,
-        aws_conn_id: str | None = "aws_default",
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self.evaluation_run_id = evaluation_run_id
-        self.show_results = show_results
-        self.verify_result_status = verify_result_status
-        self.aws_conn_id = aws_conn_id
-        self.max_retries = max_retries
-        self.poke_interval = poke_interval
-        self.deferrable = deferrable
-    def execute(self, context: Context) -> Any:
-        if self.deferrable:
-            self.defer(
-                trigger=GlueDataQualityRuleSetEvaluationRunCompleteTrigger(
-                    evaluation_run_id=self.evaluation_run_id,
-                    waiter_delay=int(self.poke_interval),
-                    waiter_max_attempts=self.max_retries,
-                    aws_conn_id=self.aws_conn_id,
-                ),
-                method_name="execute_complete",
-            )
-        else:
-            super().execute(context=context)
-    def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
-        event = validate_execute_complete_event(event)
-        if event["status"] != "success":
-            message = f"Error: AWS Glue data quality ruleset evaluation run: {event}"
-            if self.soft_fail:
-                raise AirflowSkipException(message)
-            raise AirflowException(message)
-        self.hook.validate_evaluation_run_results(
-            evaluation_run_id=event["evaluation_run_id"],
-            show_results=self.show_results,
-            verify_result_status=self.verify_result_status,
-        )
-        self.log.info("AWS Glue data quality ruleset evaluation run completed.")
-    def poke(self, context: Context):
-        self.log.info(
-            "Poking for AWS Glue data quality ruleset evaluation run RunId: %s", self.evaluation_run_id
-        )
-        response = self.hook.conn.get_data_quality_ruleset_evaluation_run(RunId=self.evaluation_run_id)
-        status = response.get("Status")
-        if status in self.SUCCESS_STATES:
-            self.hook.validate_evaluation_run_results(
-                evaluation_run_id=self.evaluation_run_id,
-                show_results=self.show_results,
-                verify_result_status=self.verify_result_status,
-            )
-            self.log.info(
-                "AWS Glue data quality ruleset evaluation run completed RunId: %s Run State: %s",
-                self.evaluation_run_id,
-                response["Status"],
-            )
-            return True
-        elif status in self.FAILURE_STATES:
-            job_error_message = (
-                f"Error: AWS Glue data quality ruleset evaluation run RunId: {self.evaluation_run_id} Run "
-                f"Status: {status}"
-                f": {response.get('ErrorString')}"
-            )
-            self.log.info(job_error_message)
-            # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
-            if self.soft_fail:
-                raise AirflowSkipException(job_error_message)
-            raise AirflowException(job_error_message)
-        else:
-            return False
-class GlueDataQualityRuleRecommendationRunSensor(AwsBaseSensor[GlueDataQualityHook]):
-    """
-    Waits for an AWS Glue data quality recommendation run to reach any of the status below.
-    'FAILED', 'STOPPED', 'STOPPING', 'TIMEOUT', 'SUCCEEDED'
-    .. seealso::
-        For more information on how to use this sensor, take a look at the guide:
-        :ref:`howto/sensor:GlueDataQualityRuleRecommendationRunSensor`
-    :param recommendation_run_id: The AWS Glue data quality rule recommendation run identifier.
-    :param show_results: Displays the recommended ruleset (a set of rules), when recommendation run completes. (default: True)
-    :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
-        module to be installed.
-        (default: False, but can be overridden in config file by setting default_deferrable to True)
-    :param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
-    :param max_retries: Number of times before returning the current state. (default: 60)
-    :param aws_conn_id: The Airflow connection used for AWS credentials.
-        If this is ``None`` or empty then the default boto3 behaviour is used. If
-        running Airflow in a distributed manner and aws_conn_id is None or
-        empty, then default boto3 configuration would be used (and must be
-        maintained on each worker node).
-    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
-    :param verify: Whether to verify SSL certificates. See:
-        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
-    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
-        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
-    """
-    SUCCESS_STATES = ("SUCCEEDED",)
-    FAILURE_STATES = ("FAILED", "STOPPED", "STOPPING", "TIMEOUT")
-    aws_hook_class = GlueDataQualityHook
-    template_fields: Sequence[str] = aws_template_fields("recommendation_run_id")
-    def __init__(
-        self,
-        *,
-        recommendation_run_id: str,
-        show_results: bool = True,
-        deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
-        poke_interval: int = 120,
-        max_retries: int = 60,
-        aws_conn_id: str | None = "aws_default",
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self.recommendation_run_id = recommendation_run_id
-        self.show_results = show_results
-        self.deferrable = deferrable
-        self.poke_interval = poke_interval
-        self.max_retries = max_retries
-        self.aws_conn_id = aws_conn_id
-    def execute(self, context: Context) -> Any:
-        if self.deferrable:
-            self.defer(
-                trigger=GlueDataQualityRuleRecommendationRunCompleteTrigger(
-                    recommendation_run_id=self.recommendation_run_id,
-                    waiter_delay=int(self.poke_interval),
-                    waiter_max_attempts=self.max_retries,
-                    aws_conn_id=self.aws_conn_id,
-                ),
-                method_name="execute_complete",
-            )
-        else:
-            super().execute(context=context)
-    def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
-        event = validate_execute_complete_event(event)
-        if event["status"] != "success":
-            message = f"Error: AWS Glue data quality recommendation run: {event}"
-            if self.soft_fail:
-                raise AirflowSkipException(message)
-            raise AirflowException(message)
-        if self.show_results:
-            self.hook.log_recommendation_results(run_id=self.recommendation_run_id)
-        self.log.info("AWS Glue data quality recommendation run completed.")
-    def poke(self, context: Context) -> bool:
-        self.log.info(
-            "Poking for AWS Glue data quality recommendation run RunId: %s", self.recommendation_run_id
-        )
-        response = self.hook.conn.get_data_quality_rule_recommendation_run(RunId=self.recommendation_run_id)
-        status = response.get("Status")
-        if status in self.SUCCESS_STATES:
-            if self.show_results:
-                self.hook.log_recommendation_results(run_id=self.recommendation_run_id)
-            self.log.info(
-                "AWS Glue data quality recommendation run completed RunId: %s Run State: %s",
-                self.recommendation_run_id,
-                response["Status"],
-            )
-            return True
-        elif status in self.FAILURE_STATES:
-            job_error_message = (
-                f"Error: AWS Glue data quality recommendation run RunId: {self.recommendation_run_id} Run "
-                f"Status: {status}"
-                f": {response.get('ErrorString')}"
-            )
-            self.log.info(job_error_message)
-            # TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
-            if self.soft_fail:
-                raise AirflowSkipException(job_error_message)
-            raise AirflowException(job_error_message)
-        else:
-            return False

airflow/providers/amazon/aws/sensors/s3.py CHANGED Viewed

@@ -78,11 +78,6 @@ class S3KeySensor(BaseSensorOperator):
                  CA cert bundle than the one used by botocore.
     :param deferrable: Run operator in the deferrable mode
     :param use_regex: whether to use regex to check bucket
-    :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
-        Acceptable values: Any top level attribute returned by s3.head_object. Specify * to return
-        all available attributes.
-        Default value: "Size".
-        If the requested attribute is not found, the key is still included and the value is None.
     """
     template_fields: Sequence[str] = ("bucket_key", "bucket_name")
@@ -98,7 +93,6 @@ class S3KeySensor(BaseSensorOperator):
         verify: str | bool | None = None,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         use_regex: bool = False,
-        metadata_keys: list[str] | None = None,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -110,14 +104,14 @@ class S3KeySensor(BaseSensorOperator):
         self.verify = verify
         self.deferrable = deferrable
         self.use_regex = use_regex
-        self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
     def _check_key(self, key):
         bucket_name, key = S3Hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
         self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
         """
-        Set variable `files` which contains a list of dict which contains attributes defined by the user
+        Set variable `files` which contains a list of dict which contains only the size
+        If needed we might want to add other attributes later
         Format: [{
             'Size': int
         }]
@@ -129,21 +123,8 @@ class S3KeySensor(BaseSensorOperator):
             if not key_matches:
                 return False
-            # Reduce the set of metadata to requested attributes
-            files = []
-            for f in key_matches:
-                metadata = {}
-                if "*" in self.metadata_keys:
-                    metadata = self.hook.head_object(f["Key"], bucket_name)
-                else:
-                    for key in self.metadata_keys:
-                        try:
-                            metadata[key] = f[key]
-                        except KeyError:
-                            # supplied key might be from head_object response
-                            self.log.info("Key %s not found in response, performing head_object", key)
-                            metadata[key] = self.hook.head_object(f["Key"], bucket_name).get(key, None)
-                files.append(metadata)
+            # Reduce the set of metadata to size only
+            files = [{"Size": f["Size"]} for f in key_matches]
         elif self.use_regex:
             keys = self.hook.get_file_metadata("", bucket_name)
             key_matches = [k for k in keys if re.match(pattern=key, string=k["Key"])]
@@ -153,18 +134,7 @@ class S3KeySensor(BaseSensorOperator):
             obj = self.hook.head_object(key, bucket_name)
             if obj is None:
                 return False
-            metadata = {}
-            if "*" in self.metadata_keys:
-                metadata = self.hook.head_object(key, bucket_name)
-            else:
-                for key in self.metadata_keys:
-                    # backwards compatibility with original implementation
-                    if key == "Size":
-                        metadata[key] = obj.get("ContentLength")
-                    else:
-                        metadata[key] = obj.get(key, None)
-            files = [metadata]
+            files = [{"Size": obj["ContentLength"]}]
         if self.check_fn is not None:
             return self.check_fn(files)

airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py CHANGED Viewed

@@ -105,6 +105,7 @@ class DynamoDBToS3Operator(AwsToAwsBaseOperator):
         "file_size",
         "dynamodb_scan_kwargs",
         "s3_key_prefix",
+        "process_func",
         "export_time",
         "export_format",
         "check_interval",

airflow/providers/amazon/aws/triggers/glue.py CHANGED Viewed

@@ -19,14 +19,10 @@ from __future__ import annotations
 import asyncio
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, AsyncIterator
+from typing import Any, AsyncIterator
-if TYPE_CHECKING:
-    from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
-from airflow.providers.amazon.aws.hooks.glue import GlueDataQualityHook, GlueJobHook
+from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
 from airflow.providers.amazon.aws.hooks.glue_catalog import GlueCatalogHook
-from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
 from airflow.triggers.base import BaseTrigger, TriggerEvent
@@ -152,73 +148,3 @@ class GlueCatalogPartitionTrigger(BaseTrigger):
                     break
                 else:
                     await asyncio.sleep(self.waiter_delay)
-class GlueDataQualityRuleSetEvaluationRunCompleteTrigger(AwsBaseWaiterTrigger):
-    """
-    Trigger when a AWS Glue data quality evaluation run complete.
-    :param evaluation_run_id: The AWS Glue data quality ruleset evaluation run identifier.
-    :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
-    :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
-    :param aws_conn_id: The Airflow connection used for AWS credentials.
-    """
-    def __init__(
-        self,
-        evaluation_run_id: str,
-        waiter_delay: int = 60,
-        waiter_max_attempts: int = 75,
-        aws_conn_id: str | None = "aws_default",
-    ):
-        super().__init__(
-            serialized_fields={"evaluation_run_id": evaluation_run_id},
-            waiter_name="data_quality_ruleset_evaluation_run_complete",
-            waiter_args={"RunId": evaluation_run_id},
-            failure_message="AWS Glue data quality ruleset evaluation run failed.",
-            status_message="Status of AWS Glue data quality ruleset evaluation run is",
-            status_queries=["Status"],
-            return_key="evaluation_run_id",
-            return_value=evaluation_run_id,
-            waiter_delay=waiter_delay,
-            waiter_max_attempts=waiter_max_attempts,
-            aws_conn_id=aws_conn_id,
-        )
-    def hook(self) -> AwsGenericHook:
-        return GlueDataQualityHook(aws_conn_id=self.aws_conn_id)
-class GlueDataQualityRuleRecommendationRunCompleteTrigger(AwsBaseWaiterTrigger):
-    """
-    Trigger when a AWS Glue data quality recommendation run complete.
-    :param recommendation_run_id: The AWS Glue data quality rule recommendation run identifier.
-    :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
-    :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
-    :param aws_conn_id: The Airflow connection used for AWS credentials.
-    """
-    def __init__(
-        self,
-        recommendation_run_id: str,
-        waiter_delay: int = 60,
-        waiter_max_attempts: int = 75,
-        aws_conn_id: str | None = "aws_default",
-    ):
-        super().__init__(
-            serialized_fields={"recommendation_run_id": recommendation_run_id},
-            waiter_name="data_quality_rule_recommendation_run_complete",
-            waiter_args={"RunId": recommendation_run_id},
-            failure_message="AWS Glue data quality recommendation run failed.",
-            status_message="Status of AWS Glue data quality recommendation run is",
-            status_queries=["Status"],
-            return_key="recommendation_run_id",
-            return_value=recommendation_run_id,
-            waiter_delay=waiter_delay,
-            waiter_max_attempts=waiter_max_attempts,
-            aws_conn_id=aws_conn_id,
-        )
-    def hook(self) -> AwsGenericHook:
-        return GlueDataQualityHook(aws_conn_id=self.aws_conn_id)

airflow/providers/amazon/aws/waiters/glue.json CHANGED Viewed

@@ -25,104 +25,6 @@
                     "state": "success"
                 }
             ]
-        },
-        "data_quality_ruleset_evaluation_run_complete": {
-            "operation": "GetDataQualityRulesetEvaluationRun",
-            "delay": 60,
-            "maxAttempts": 75,
-            "acceptors": [
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "STARTING",
-                    "state": "retry"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "RUNNING",
-                    "state": "retry"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "STOPPING",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "STOPPED",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "FAILED",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "TIMEOUT",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "SUCCEEDED",
-                    "state": "success"
-                }
-            ]
-        },
-        "data_quality_rule_recommendation_run_complete": {
-            "operation": "GetDataQualityRuleRecommendationRun",
-            "delay": 60,
-            "maxAttempts": 75,
-            "acceptors": [
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "STARTING",
-                    "state": "retry"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "RUNNING",
-                    "state": "retry"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "STOPPING",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "STOPPED",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "FAILED",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "TIMEOUT",
-                    "state": "failure"
-                },
-                {
-                    "matcher": "path",
-                    "argument": "Status",
-                    "expected": "SUCCEEDED",
-                    "state": "success"
-                }
-            ]
         }
     }
 }

airflow/providers/amazon/get_provider_info.py CHANGED Viewed

@@ -94,38 +94,32 @@ def get_provider_info():
             "apache-airflow>=2.7.0",
             "apache-airflow-providers-common-sql>=1.3.1",
             "apache-airflow-providers-http",
-            "boto3>=1.34.90",
-            "botocore>=1.34.90",
+            "boto3>=1.33.0",
+            "botocore>=1.33.0",
             "inflection>=0.5.1",
-            "watchtower>=3.0.0,<4",
+            "watchtower>=2.0.1,<4",
             "jsonpath_ng>=1.5.3",
             "redshift_connector>=2.0.918",
             "sqlalchemy_redshift>=0.8.6",
-            "asgiref>=2.3.0",
+            "asgiref",
             "PyAthena>=3.0.10",
-            "jmespath>=0.7.0",
+            "jmespath",
         ],
         "additional-extras": [
-            {
-                "name": "pandas",
-                "dependencies": [
-                    'pandas>=1.5.3,<2.2;python_version<"3.12"',
-                    'pandas>=2.1.1,<2.2;python_version>="3.12"',
-                ],
-            },
-            {"name": "aiobotocore", "dependencies": ["aiobotocore[boto3]>=2.13.0"]},
+            {"name": "pandas", "dependencies": ["pandas>=1.2.5,<2.2"]},
+            {"name": "aiobotocore", "dependencies": ["aiobotocore[boto3]>=2.5.3"]},
             {"name": "cncf.kubernetes", "dependencies": ["apache-airflow-providers-cncf-kubernetes>=7.2.0"]},
             {"name": "s3fs", "dependencies": ["s3fs>=2023.10.0"]},
             {"name": "python3-saml", "dependencies": ["python3-saml>=1.16.0"]},
         ],
         "devel-dependencies": [
-            "aiobotocore>=2.13.0",
+            "aiobotocore>=2.7.0",
             "aws_xray_sdk>=2.12.0",
             "moto[cloudformation,glue]>=5.0.0",
-            "mypy-boto3-appflow>=1.34.0",
-            "mypy-boto3-rds>=1.34.90",
-            "mypy-boto3-redshift-data>=1.34.0",
-            "mypy-boto3-s3>=1.34.90",
+            "mypy-boto3-appflow>=1.33.0",
+            "mypy-boto3-rds>=1.33.0",
+            "mypy-boto3-redshift-data>=1.33.0",
+            "mypy-boto3-s3>=1.33.0",
             "s3fs>=2023.10.0",
             "openapi-schema-validator>=0.6.2",
             "openapi-spec-validator>=0.7.1",

apache-airflow-providers-amazon 8.24.0__py3-none-any.whl → 8.24.0rc1__py3-none-any.whl

apache-airflow-providers-amazon 8.24.0py3-none-any.whl → 8.24.0rc1py3-none-any.whl