PyPI - apache-airflow-providers-amazon - Versions diffs - 8.25.0rc1__py3-none-any.whl → 8.26.0__py3-none-any.whl - Mend

apache-airflow-providers-amazon 8.25.0rc1py3-none-any.whl → 8.26.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

airflow/providers/amazon/aws/operators/emr.py CHANGED Viewed

@@ -27,7 +27,6 @@ from uuid import uuid4
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
 from airflow.models import BaseOperator
-from airflow.models.mappedoperator import MappedOperator
 from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
 from airflow.providers.amazon.aws.links.emr import (
     EmrClusterLink,
@@ -1259,91 +1258,12 @@ class EmrServerlessStartJobOperator(BaseOperator):
         "configuration_overrides": "json",
     }
-    @property
-    def operator_extra_links(self):
-        """
-        Dynamically add extra links depending on the job type and if they're enabled.
-        If S3 or CloudWatch monitoring configurations exist, add links directly to the relevant consoles.
-        Only add dashboard links if they're explicitly enabled. These are one-time links that any user
-        can access, but expire on first click or one hour, whichever comes first.
-        """
-        op_extra_links = []
-        if isinstance(self, MappedOperator):
-            operator_class = self.operator_class
-            enable_application_ui_links = self.partial_kwargs.get(
-                "enable_application_ui_links"
-            ) or self.expand_input.value.get("enable_application_ui_links")
-            job_driver = self.partial_kwargs.get("job_driver", {}) or self.expand_input.value.get(
-                "job_driver", {}
-            )
-            configuration_overrides = self.partial_kwargs.get(
-                "configuration_overrides"
-            ) or self.expand_input.value.get("configuration_overrides")
-            # Configuration overrides can either be a list or a dictionary, depending on whether it's passed in as partial or expand.
-            if isinstance(configuration_overrides, list):
-                if any(
-                    [
-                        operator_class.is_monitoring_in_job_override(
-                            self=operator_class,
-                            config_key="s3MonitoringConfiguration",
-                            job_override=job_override,
-                        )
-                        for job_override in configuration_overrides
-                    ]
-                ):
-                    op_extra_links.extend([EmrServerlessS3LogsLink()])
-                if any(
-                    [
-                        operator_class.is_monitoring_in_job_override(
-                            self=operator_class,
-                            config_key="cloudWatchLoggingConfiguration",
-                            job_override=job_override,
-                        )
-                        for job_override in configuration_overrides
-                    ]
-                ):
-                    op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
-            else:
-                if operator_class.is_monitoring_in_job_override(
-                    self=operator_class,
-                    config_key="s3MonitoringConfiguration",
-                    job_override=configuration_overrides,
-                ):
-                    op_extra_links.extend([EmrServerlessS3LogsLink()])
-                if operator_class.is_monitoring_in_job_override(
-                    self=operator_class,
-                    config_key="cloudWatchLoggingConfiguration",
-                    job_override=configuration_overrides,
-                ):
-                    op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
-        else:
-            operator_class = self
-            enable_application_ui_links = self.enable_application_ui_links
-            configuration_overrides = self.configuration_overrides
-            job_driver = self.job_driver
-            if operator_class.is_monitoring_in_job_override(
-                "s3MonitoringConfiguration", configuration_overrides
-            ):
-                op_extra_links.extend([EmrServerlessS3LogsLink()])
-            if operator_class.is_monitoring_in_job_override(
-                "cloudWatchLoggingConfiguration", configuration_overrides
-            ):
-                op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
-        if enable_application_ui_links:
-            op_extra_links.extend([EmrServerlessDashboardLink()])
-            if isinstance(job_driver, list):
-                if any("sparkSubmit" in ind_job_driver for ind_job_driver in job_driver):
-                    op_extra_links.extend([EmrServerlessLogsLink()])
-            elif "sparkSubmit" in job_driver:
-                op_extra_links.extend([EmrServerlessLogsLink()])
-        return tuple(op_extra_links)
+    operator_extra_links = (
+        EmrServerlessS3LogsLink(),
+        EmrServerlessCloudWatchLogsLink(),
+        EmrServerlessDashboardLink(),
+        EmrServerlessLogsLink(),
+    )
     def __init__(
         self,

airflow/providers/amazon/aws/operators/glue.py CHANGED Viewed

@@ -43,7 +43,8 @@ if TYPE_CHECKING:
 class GlueJobOperator(BaseOperator):
-    """Create an AWS Glue Job.
+    """
+    Create an AWS Glue Job.
     AWS Glue is a serverless Spark ETL service for running Spark Jobs on the AWS
     cloud. Language support: Python and Scala.
@@ -179,7 +180,8 @@ class GlueJobOperator(BaseOperator):
         )
     def execute(self, context: Context):
-        """Execute AWS Glue Job from Airflow.
+        """
+        Execute AWS Glue Job from Airflow.
         :return: the current Glue job ID.
         """

airflow/providers/amazon/aws/operators/glue_crawler.py CHANGED Viewed

@@ -17,22 +17,22 @@
 # under the License.
 from __future__ import annotations
-from functools import cached_property
 from typing import TYPE_CHECKING, Any, Sequence
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException
+from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
 from airflow.providers.amazon.aws.triggers.glue_crawler import GlueCrawlerCompleteTrigger
 from airflow.providers.amazon.aws.utils import validate_execute_complete_event
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
     from airflow.utils.context import Context
-from airflow.models import BaseOperator
 from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
-class GlueCrawlerOperator(BaseOperator):
+class GlueCrawlerOperator(AwsBaseOperator[GlueCrawlerHook]):
     """
     Creates, updates and triggers an AWS Glue Crawler.
@@ -45,45 +45,45 @@ class GlueCrawlerOperator(BaseOperator):
         :ref:`howto/operator:GlueCrawlerOperator`
     :param config: Configurations for the AWS Glue crawler
-    :param aws_conn_id: The Airflow connection used for AWS credentials.
-        If this is None or empty then the default boto3 behaviour is used. If
-        running Airflow in a distributed manner and aws_conn_id is None or
-        empty, then default boto3 configuration would be used (and must be
-        maintained on each worker node).
     :param poll_interval: Time (in seconds) to wait between two consecutive calls to check crawler status
     :param wait_for_completion: Whether to wait for crawl execution completion. (default: True)
     :param deferrable: If True, the operator will wait asynchronously for the crawl to complete.
         This implies waiting for completion. This mode requires aiobotocore module to be installed.
         (default: False)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
-    template_fields: Sequence[str] = ("config",)
+    aws_hook_class = GlueCrawlerHook
+    template_fields: Sequence[str] = aws_template_fields(
+        "config",
+    )
     ui_color = "#ededed"
     def __init__(
         self,
         config,
-        aws_conn_id="aws_default",
-        region_name: str | None = None,
         poll_interval: int = 5,
         wait_for_completion: bool = True,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         **kwargs,
     ):
         super().__init__(**kwargs)
-        self.aws_conn_id = aws_conn_id
         self.poll_interval = poll_interval
         self.wait_for_completion = wait_for_completion
         self.deferrable = deferrable
-        self.region_name = region_name
         self.config = config
-    @cached_property
-    def hook(self) -> GlueCrawlerHook:
-        """Create and return a GlueCrawlerHook."""
-        return GlueCrawlerHook(self.aws_conn_id, region_name=self.region_name)
-    def execute(self, context: Context):
+    def execute(self, context: Context) -> str:
         """
         Execute AWS Glue Crawler from Airflow.
@@ -103,6 +103,9 @@ class GlueCrawlerOperator(BaseOperator):
                     crawler_name=crawler_name,
                     waiter_delay=self.poll_interval,
                     aws_conn_id=self.aws_conn_id,
+                    region_name=self.region_name,
+                    verify=self.verify,
+                    botocore_config=self.botocore_config,
                 ),
                 method_name="execute_complete",
             )

airflow/providers/amazon/aws/operators/neptune.py CHANGED Viewed

@@ -81,7 +81,8 @@ def handle_waitable_exception(
 class NeptuneStartDbClusterOperator(AwsBaseOperator[NeptuneHook]):
-    """Starts an Amazon Neptune DB cluster.
+    """
+    Starts an Amazon Neptune DB cluster.
     Amazon Neptune Database is a serverless graph database designed for superior scalability
     and availability. Neptune Database provides built-in security, continuous backups, and

airflow/providers/amazon/aws/operators/redshift_cluster.py CHANGED Viewed

@@ -38,7 +38,8 @@ if TYPE_CHECKING:
 class RedshiftCreateClusterOperator(BaseOperator):
-    """Creates a new cluster with the specified parameters.
+    """
+    Creates a new cluster with the specified parameters.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:

airflow/providers/amazon/aws/operators/s3.py CHANGED Viewed

@@ -610,6 +610,7 @@ class S3FileTransformOperator(BaseOperator):
     :param dest_s3_key: The key to be written from S3. (templated)
     :param transform_script: location of the executable transformation script
     :param select_expression: S3 Select expression
+    :param select_expr_serialization_config: A dictionary that contains input and output serialization configurations for S3 Select.
     :param script_args: arguments for transformation script (templated)
     :param source_aws_conn_id: source s3 connection
     :param source_verify: Whether or not to verify SSL certificates for S3 connection.
@@ -641,6 +642,7 @@ class S3FileTransformOperator(BaseOperator):
         dest_s3_key: str,
         transform_script: str | None = None,
         select_expression=None,
+        select_expr_serialization_config: dict[str, dict[str, dict]] | None = None,
         script_args: Sequence[str] | None = None,
         source_aws_conn_id: str | None = "aws_default",
         source_verify: bool | str | None = None,
@@ -659,6 +661,7 @@ class S3FileTransformOperator(BaseOperator):
         self.replace = replace
         self.transform_script = transform_script
         self.select_expression = select_expression
+        self.select_expr_serialization_config = select_expr_serialization_config or {}
         self.script_args = script_args or []
         self.output_encoding = sys.getdefaultencoding()
@@ -678,7 +681,14 @@ class S3FileTransformOperator(BaseOperator):
             self.log.info("Dumping S3 file %s contents to local file %s", self.source_s3_key, f_source.name)
             if self.select_expression is not None:
-                content = source_s3.select_key(key=self.source_s3_key, expression=self.select_expression)
+                input_serialization = self.select_expr_serialization_config.get("input_serialization")
+                output_serialization = self.select_expr_serialization_config.get("output_serialization")
+                content = source_s3.select_key(
+                    key=self.source_s3_key,
+                    expression=self.select_expression,
+                    input_serialization=input_serialization,
+                    output_serialization=output_serialization,
+                )
                 f_source.write(content.encode("utf-8"))
             else:
                 source_s3_key_object.download_fileobj(Fileobj=f_source)

airflow/providers/amazon/aws/operators/sagemaker.py CHANGED Viewed

@@ -60,7 +60,8 @@ def serialize(result: dict) -> dict:
 class SageMakerBaseOperator(BaseOperator):
-    """This is the base operator for all SageMaker operators.
+    """
+    This is the base operator for all SageMaker operators.
     :param config: The configuration necessary to start a training job (templated)
     """
@@ -360,7 +361,7 @@ class SageMakerProcessingOperator(SageMakerBaseOperator):
             raise AirflowException(f"Error while running job: {event}")
         self.log.info(event["message"])
-        self.serialized_job = serialize(self.hook.describe_processing_job(self.config["ProcessingJobName"]))
+        self.serialized_job = serialize(self.hook.describe_processing_job(event["job_name"]))
         self.log.info("%s completed successfully.", self.task_id)
         return {"Processing": self.serialized_job}
@@ -611,12 +612,11 @@ class SageMakerEndpointOperator(SageMakerBaseOperator):
         if event["status"] != "success":
             raise AirflowException(f"Error while running job: {event}")
-        endpoint_info = self.config.get("Endpoint", self.config)
+        response = self.hook.describe_endpoint(event["job_name"])
         return {
-            "EndpointConfig": serialize(
-                self.hook.describe_endpoint_config(endpoint_info["EndpointConfigName"])
-            ),
-            "Endpoint": serialize(self.hook.describe_endpoint(endpoint_info["EndpointName"])),
+            "EndpointConfig": serialize(self.hook.describe_endpoint_config(response["EndpointConfigName"])),
+            "Endpoint": serialize(self.hook.describe_endpoint(response["EndpointName"])),
         }
@@ -996,9 +996,7 @@ class SageMakerTuningOperator(SageMakerBaseOperator):
         if event["status"] != "success":
             raise AirflowException(f"Error while running job: {event}")
-        return {
-            "Tuning": serialize(self.hook.describe_tuning_job(self.config["HyperParameterTuningJobName"]))
-        }
+        return {"Tuning": serialize(self.hook.describe_tuning_job(event["job_name"]))}
 class SageMakerModelOperator(SageMakerBaseOperator):

airflow/providers/amazon/aws/sensors/base_aws.py CHANGED Viewed

@@ -30,7 +30,8 @@ from airflow.utils.types import NOTSET, ArgNotSet
 class AwsBaseSensor(BaseSensorOperator, AwsBaseHookMixin[AwsHookType]):
-    """Base AWS (Amazon) Sensor Class for build sensors in top of AWS Hooks.
+    """
+    Base AWS (Amazon) Sensor Class for build sensors in top of AWS Hooks.
     .. warning::
         Only for internal usage, this class might be changed, renamed or removed in the future

airflow/providers/amazon/aws/sensors/glue_catalog_partition.py CHANGED Viewed

@@ -18,7 +18,6 @@
 from __future__ import annotations
 from datetime import timedelta
-from functools import cached_property
 from typing import TYPE_CHECKING, Any, Sequence
 from deprecated import deprecated
@@ -26,18 +25,23 @@ from deprecated import deprecated
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
 from airflow.providers.amazon.aws.hooks.glue_catalog import GlueCatalogHook
+from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
 from airflow.providers.amazon.aws.triggers.glue import GlueCatalogPartitionTrigger
 from airflow.providers.amazon.aws.utils import validate_execute_complete_event
-from airflow.sensors.base import BaseSensorOperator
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
     from airflow.utils.context import Context
-class GlueCatalogPartitionSensor(BaseSensorOperator):
+class GlueCatalogPartitionSensor(AwsBaseSensor[GlueCatalogHook]):
     """
     Waits for a partition to show up in AWS Glue Catalog.
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:GlueCatalogPartitionSensor`
     :param table_name: The name of the table to wait for, supports the dot
         notation (my_database.my_table)
     :param expression: The partition clause to wait for. This is passed as
@@ -46,19 +50,27 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
         AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"``.
         See https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html
         #aws-glue-api-catalog-partitions-GetPartitions
-    :param aws_conn_id: ID of the Airflow connection where
-        credentials and extra configuration are stored
-    :param region_name: Optional aws region name (example: us-east-1). Uses region from connection
-        if not specified.
     :param database_name: The name of the catalog database where the partitions reside.
     :param poke_interval: Time in seconds that the job should wait in
         between each tries
     :param deferrable: If true, then the sensor will wait asynchronously for the partition to
         show up in the AWS Glue Catalog.
         (default: False, but can be overridden in config file by setting default_deferrable to True)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
-    template_fields: Sequence[str] = (
+    aws_hook_class = GlueCatalogHook
+    template_fields: Sequence[str] = aws_template_fields(
         "database_name",
         "table_name",
         "expression",
@@ -70,19 +82,16 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
         *,
         table_name: str,
         expression: str = "ds='{{ ds }}'",
-        aws_conn_id: str | None = "aws_default",
-        region_name: str | None = None,
         database_name: str = "default",
         poke_interval: int = 60 * 3,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         **kwargs,
     ):
-        super().__init__(poke_interval=poke_interval, **kwargs)
-        self.aws_conn_id = aws_conn_id
-        self.region_name = region_name
+        super().__init__(**kwargs)
         self.table_name = table_name
         self.expression = expression
         self.database_name = database_name
+        self.poke_interval = poke_interval
         self.deferrable = deferrable
     def execute(self, context: Context) -> Any:
@@ -93,7 +102,10 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
                     table_name=self.table_name,
                     expression=self.expression,
                     aws_conn_id=self.aws_conn_id,
+                    region_name=self.region_name,
                     waiter_delay=int(self.poke_interval),
+                    verify=self.verify,
+                    botocore_config=self.botocore_config,
                 ),
                 method_name="execute_complete",
                 timeout=timedelta(seconds=self.timeout),
@@ -126,7 +138,3 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
     def get_hook(self) -> GlueCatalogHook:
         """Get the GlueCatalogHook."""
         return self.hook
-    @cached_property
-    def hook(self) -> GlueCatalogHook:
-        return GlueCatalogHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)

airflow/providers/amazon/aws/sensors/glue_crawler.py CHANGED Viewed

@@ -17,20 +17,20 @@
 # under the License.
 from __future__ import annotations
-from functools import cached_property
 from typing import TYPE_CHECKING, Sequence
 from deprecated import deprecated
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
 from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
-from airflow.sensors.base import BaseSensorOperator
+from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
     from airflow.utils.context import Context
-class GlueCrawlerSensor(BaseSensorOperator):
+class GlueCrawlerSensor(AwsBaseSensor[GlueCrawlerHook]):
     """
     Waits for an AWS Glue crawler to reach any of the statuses below.
@@ -41,19 +41,27 @@ class GlueCrawlerSensor(BaseSensorOperator):
         :ref:`howto/sensor:GlueCrawlerSensor`
     :param crawler_name: The AWS Glue crawler unique name
-    :param aws_conn_id: aws connection to use, defaults to 'aws_default'
-        If this is None or empty then the default boto3 behaviour is used. If
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
         running Airflow in a distributed manner and aws_conn_id is None or
         empty, then default boto3 configuration would be used (and must be
         maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
-    template_fields: Sequence[str] = ("crawler_name",)
+    aws_hook_class = GlueCrawlerHook
-    def __init__(self, *, crawler_name: str, aws_conn_id: str | None = "aws_default", **kwargs) -> None:
+    template_fields: Sequence[str] = aws_template_fields(
+        "crawler_name",
+    )
+    def __init__(self, *, crawler_name: str, **kwargs) -> None:
         super().__init__(**kwargs)
         self.crawler_name = crawler_name
-        self.aws_conn_id = aws_conn_id
         self.success_statuses = "SUCCEEDED"
         self.errored_statuses = ("FAILED", "CANCELLED")
@@ -79,7 +87,3 @@ class GlueCrawlerSensor(BaseSensorOperator):
     def get_hook(self) -> GlueCrawlerHook:
         """Return a new or pre-existing GlueCrawlerHook."""
         return self.hook
-    @cached_property
-    def hook(self) -> GlueCrawlerHook:
-        return GlueCrawlerHook(aws_conn_id=self.aws_conn_id)

airflow/providers/amazon/aws/sensors/s3.py CHANGED Viewed

@@ -18,6 +18,7 @@
 from __future__ import annotations
 import fnmatch
+import inspect
 import os
 import re
 from datetime import datetime, timedelta
@@ -57,13 +58,13 @@ class S3KeySensor(BaseSensorOperator):
         refers to this bucket
     :param wildcard_match: whether the bucket_key should be interpreted as a
         Unix wildcard pattern
-    :param check_fn: Function that receives the list of the S3 objects,
+    :param check_fn: Function that receives the list of the S3 objects with the context values,
         and returns a boolean:
         - ``True``: the criteria is met
         - ``False``: the criteria isn't met
         **Example**: Wait for any S3 object size more than 1 megabyte  ::
-            def check_fn(files: List) -> bool:
+            def check_fn(files: List, **kwargs) -> bool:
                 return any(f.get('Size', 0) > 1048576 for f in files)
     :param aws_conn_id: a reference to the s3 connection
     :param verify: Whether to verify SSL certificates for S3 connection.
@@ -112,7 +113,7 @@ class S3KeySensor(BaseSensorOperator):
         self.use_regex = use_regex
         self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
-    def _check_key(self, key):
+    def _check_key(self, key, context: Context):
         bucket_name, key = S3Hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
         self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
@@ -167,15 +168,20 @@ class S3KeySensor(BaseSensorOperator):
             files = [metadata]
         if self.check_fn is not None:
+            # For backwards compatibility, check if the function takes a context argument
+            signature = inspect.signature(self.check_fn)
+            if any(param.kind == inspect.Parameter.VAR_KEYWORD for param in signature.parameters.values()):
+                return self.check_fn(files, **context)
+            # Otherwise, just pass the files
             return self.check_fn(files)
         return True
     def poke(self, context: Context):
         if isinstance(self.bucket_key, str):
-            return self._check_key(self.bucket_key)
+            return self._check_key(self.bucket_key, context=context)
         else:
-            return all(self._check_key(key) for key in self.bucket_key)
+            return all(self._check_key(key, context=context) for key in self.bucket_key)
     def execute(self, context: Context) -> None:
         """Airflow runs this method on the worker and defers using the trigger."""

airflow/providers/amazon/aws/transfers/mongo_to_s3.py CHANGED Viewed

@@ -34,7 +34,8 @@ if TYPE_CHECKING:
 class MongoToS3Operator(BaseOperator):
-    """Move data from MongoDB to S3.
+    """
+    Move data from MongoDB to S3.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -128,7 +129,8 @@ class MongoToS3Operator(BaseOperator):
     @staticmethod
     def _stringify(iterable: Iterable, joinable: str = "\n") -> str:
-        """Stringify an iterable of dicts.
+        """
+        Stringify an iterable of dicts.
         This dumps each dict with JSON, and joins them with ``joinable``.
         """
@@ -136,7 +138,8 @@ class MongoToS3Operator(BaseOperator):
     @staticmethod
     def transform(docs: Any) -> Any:
-        """Transform the data for transfer.
+        """
+        Transform the data for transfer.
         This method is meant to be extended by child classes to perform
         transformations unique to those operators needs. Processes pyMongo

airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py CHANGED Viewed

@@ -44,7 +44,8 @@ class KeySchema(TypedDict):
 class S3ToDynamoDBOperator(BaseOperator):
-    """Load Data from S3 into a DynamoDB.
+    """
+    Load Data from S3 into a DynamoDB.
     Data stored in S3 can be uploaded to a new or existing DynamoDB. Supported file formats CSV, DynamoDB JSON and
     Amazon ION.

airflow/providers/amazon/aws/transfers/s3_to_sql.py CHANGED Viewed

@@ -30,7 +30,8 @@ if TYPE_CHECKING:
 class S3ToSqlOperator(BaseOperator):
-    """Load Data from S3 into a SQL Database.
+    """
+    Load Data from S3 into a SQL Database.
     You need to provide a parser function that takes a filename as an input
     and returns an iterable of rows

airflow/providers/amazon/aws/triggers/ecs.py CHANGED Viewed

@@ -179,7 +179,9 @@ class TaskDoneTrigger(BaseTrigger):
                         cluster=self.cluster, tasks=[self.task_arn], WaiterConfig={"MaxAttempts": 1}
                     )
                     # we reach this point only if the waiter met a success criteria
-                    yield TriggerEvent({"status": "success", "task_arn": self.task_arn})
+                    yield TriggerEvent(
+                        {"status": "success", "task_arn": self.task_arn, "cluster": self.cluster}
+                    )
                     return
                 except WaiterError as error:
                     if "terminal failure" in str(error):

apache-airflow-providers-amazon 8.25.0rc1__py3-none-any.whl → 8.26.0__py3-none-any.whl

apache-airflow-providers-amazon 8.25.0rc1py3-none-any.whl → 8.26.0py3-none-any.whl