PyPI - apache-airflow-providers-amazon - Versions diffs - 8.25.0rc1__py3-none-any.whl → 8.26.0rc1__py3-none-any.whl - Mend

apache-airflow-providers-amazon 8.25.0rc1py3-none-any.whl → 8.26.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

airflow/providers/amazon/aws/operators/athena.py CHANGED Viewed

@@ -175,9 +175,6 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
                 f"query_execution_id is {self.query_execution_id}."
             )
-        # Save output location from API response for later use in OpenLineage.
-        self.output_location = self.hook.get_output_location(self.query_execution_id)
         return self.query_execution_id
     def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
@@ -185,6 +182,9 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
         if event["status"] != "success":
             raise AirflowException(f"Error while waiting for operation on cluster to complete: {event}")
+        # Save query_execution_id to be later used by listeners
+        self.query_execution_id = event["value"]
         return event["value"]
     def on_kill(self) -> None:
@@ -208,13 +208,21 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
                     )
                     self.hook.poll_query_status(self.query_execution_id, sleep_time=self.sleep_time)
-    def get_openlineage_facets_on_start(self) -> OperatorLineage:
-        """Retrieve OpenLineage data by parsing SQL queries and enriching them with Athena API.
+    def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
+        """
+        Retrieve OpenLineage data by parsing SQL queries and enriching them with Athena API.
         In addition to CTAS query, query and calculation results are stored in S3 location.
-        For that reason additional output is attached with this location.
+        For that reason additional output is attached with this location. Instead of using the complete
+        path where the results are saved (user's prefix + some UUID), we are creating a dataset with the
+        user-provided path only. This should make it easier to match this dataset across different processes.
         """
-        from openlineage.client.facet import ExtractionError, ExtractionErrorRunFacet, SqlJobFacet
+        from openlineage.client.facet import (
+            ExternalQueryRunFacet,
+            ExtractionError,
+            ExtractionErrorRunFacet,
+            SqlJobFacet,
+        )
         from openlineage.client.run import Dataset
         from airflow.providers.openlineage.extractors.base import OperatorLineage
@@ -264,6 +272,11 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
             )
         )
+        if self.query_execution_id:
+            run_facets["externalQuery"] = ExternalQueryRunFacet(
+                externalQueryId=self.query_execution_id, source="awsathena"
+            )
         if self.output_location:
             parsed = urlparse(self.output_location)
             outputs.append(Dataset(namespace=f"{parsed.scheme}://{parsed.netloc}", name=parsed.path or "/"))
@@ -300,7 +313,7 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
                 )
             }
             fields = [
-                SchemaField(name=column["Name"], type=column["Type"], description=column["Comment"])
+                SchemaField(name=column["Name"], type=column["Type"], description=column.get("Comment"))
                 for column in table_metadata["TableMetadata"]["Columns"]
             ]
             if fields:

airflow/providers/amazon/aws/operators/batch.py CHANGED Viewed

@@ -14,7 +14,8 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""AWS Batch services.
+"""
+AWS Batch services.
 .. seealso::
@@ -54,7 +55,8 @@ if TYPE_CHECKING:
 class BatchOperator(BaseOperator):
-    """Execute a job on AWS Batch.
+    """
+    Execute a job on AWS Batch.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -236,7 +238,8 @@ class BatchOperator(BaseOperator):
         )
     def execute(self, context: Context) -> str | None:
-        """Submit and monitor an AWS Batch job.
+        """
+        Submit and monitor an AWS Batch job.
         :raises: AirflowException
         """
@@ -287,7 +290,8 @@ class BatchOperator(BaseOperator):
         self.log.info("AWS Batch job (%s) terminated: %s", self.job_id, response)
     def submit_job(self, context: Context):
-        """Submit an AWS Batch job.
+        """
+        Submit an AWS Batch job.
         :raises: AirflowException
         """
@@ -342,7 +346,8 @@ class BatchOperator(BaseOperator):
         )
     def monitor_job(self, context: Context):
-        """Monitor an AWS Batch job.
+        """
+        Monitor an AWS Batch job.
         This can raise an exception or an AirflowTaskTimeout if the task was
         created with ``execution_timeout``.
@@ -434,7 +439,8 @@ class BatchOperator(BaseOperator):
 class BatchCreateComputeEnvironmentOperator(BaseOperator):
-    """Create an AWS Batch compute environment.
+    """
+    Create an AWS Batch compute environment.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:

airflow/providers/amazon/aws/operators/datasync.py CHANGED Viewed

@@ -34,7 +34,8 @@ if TYPE_CHECKING:
 class DataSyncOperator(AwsBaseOperator[DataSyncHook]):
-    """Find, Create, Update, Execute and Delete AWS DataSync Tasks.
+    """
+    Find, Create, Update, Execute and Delete AWS DataSync Tasks.
     If ``do_xcom_push`` is True, then the DataSync TaskArn and TaskExecutionArn
     which were executed will be pushed to an XCom.

airflow/providers/amazon/aws/operators/ecs.py CHANGED Viewed

@@ -586,6 +586,7 @@ class EcsRunTaskOperator(EcsBaseOperator):
         if event["status"] != "success":
             raise AirflowException(f"Error in task execution: {event}")
         self.arn = event["task_arn"]  # restore arn to its updated value, needed for next steps
+        self.cluster = event["cluster"]
         self._after_execution()
         if self._aws_logs_enabled():
             # same behavior as non-deferrable mode, return last line of logs of the task.

airflow/providers/amazon/aws/operators/emr.py CHANGED Viewed

@@ -27,7 +27,6 @@ from uuid import uuid4
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
 from airflow.models import BaseOperator
-from airflow.models.mappedoperator import MappedOperator
 from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
 from airflow.providers.amazon.aws.links.emr import (
     EmrClusterLink,
@@ -1259,91 +1258,12 @@ class EmrServerlessStartJobOperator(BaseOperator):
         "configuration_overrides": "json",
     }
-    @property
-    def operator_extra_links(self):
-        """
-        Dynamically add extra links depending on the job type and if they're enabled.
-        If S3 or CloudWatch monitoring configurations exist, add links directly to the relevant consoles.
-        Only add dashboard links if they're explicitly enabled. These are one-time links that any user
-        can access, but expire on first click or one hour, whichever comes first.
-        """
-        op_extra_links = []
-        if isinstance(self, MappedOperator):
-            operator_class = self.operator_class
-            enable_application_ui_links = self.partial_kwargs.get(
-                "enable_application_ui_links"
-            ) or self.expand_input.value.get("enable_application_ui_links")
-            job_driver = self.partial_kwargs.get("job_driver", {}) or self.expand_input.value.get(
-                "job_driver", {}
-            )
-            configuration_overrides = self.partial_kwargs.get(
-                "configuration_overrides"
-            ) or self.expand_input.value.get("configuration_overrides")
-            # Configuration overrides can either be a list or a dictionary, depending on whether it's passed in as partial or expand.
-            if isinstance(configuration_overrides, list):
-                if any(
-                    [
-                        operator_class.is_monitoring_in_job_override(
-                            self=operator_class,
-                            config_key="s3MonitoringConfiguration",
-                            job_override=job_override,
-                        )
-                        for job_override in configuration_overrides
-                    ]
-                ):
-                    op_extra_links.extend([EmrServerlessS3LogsLink()])
-                if any(
-                    [
-                        operator_class.is_monitoring_in_job_override(
-                            self=operator_class,
-                            config_key="cloudWatchLoggingConfiguration",
-                            job_override=job_override,
-                        )
-                        for job_override in configuration_overrides
-                    ]
-                ):
-                    op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
-            else:
-                if operator_class.is_monitoring_in_job_override(
-                    self=operator_class,
-                    config_key="s3MonitoringConfiguration",
-                    job_override=configuration_overrides,
-                ):
-                    op_extra_links.extend([EmrServerlessS3LogsLink()])
-                if operator_class.is_monitoring_in_job_override(
-                    self=operator_class,
-                    config_key="cloudWatchLoggingConfiguration",
-                    job_override=configuration_overrides,
-                ):
-                    op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
-        else:
-            operator_class = self
-            enable_application_ui_links = self.enable_application_ui_links
-            configuration_overrides = self.configuration_overrides
-            job_driver = self.job_driver
-            if operator_class.is_monitoring_in_job_override(
-                "s3MonitoringConfiguration", configuration_overrides
-            ):
-                op_extra_links.extend([EmrServerlessS3LogsLink()])
-            if operator_class.is_monitoring_in_job_override(
-                "cloudWatchLoggingConfiguration", configuration_overrides
-            ):
-                op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
-        if enable_application_ui_links:
-            op_extra_links.extend([EmrServerlessDashboardLink()])
-            if isinstance(job_driver, list):
-                if any("sparkSubmit" in ind_job_driver for ind_job_driver in job_driver):
-                    op_extra_links.extend([EmrServerlessLogsLink()])
-            elif "sparkSubmit" in job_driver:
-                op_extra_links.extend([EmrServerlessLogsLink()])
-        return tuple(op_extra_links)
+    operator_extra_links = (
+        EmrServerlessS3LogsLink(),
+        EmrServerlessCloudWatchLogsLink(),
+        EmrServerlessDashboardLink(),
+        EmrServerlessLogsLink(),
+    )
     def __init__(
         self,

airflow/providers/amazon/aws/operators/glue.py CHANGED Viewed

@@ -43,7 +43,8 @@ if TYPE_CHECKING:
 class GlueJobOperator(BaseOperator):
-    """Create an AWS Glue Job.
+    """
+    Create an AWS Glue Job.
     AWS Glue is a serverless Spark ETL service for running Spark Jobs on the AWS
     cloud. Language support: Python and Scala.
@@ -179,7 +180,8 @@ class GlueJobOperator(BaseOperator):
         )
     def execute(self, context: Context):
-        """Execute AWS Glue Job from Airflow.
+        """
+        Execute AWS Glue Job from Airflow.
         :return: the current Glue job ID.
         """

airflow/providers/amazon/aws/operators/glue_crawler.py CHANGED Viewed

@@ -17,22 +17,22 @@
 # under the License.
 from __future__ import annotations
-from functools import cached_property
 from typing import TYPE_CHECKING, Any, Sequence
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException
+from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
 from airflow.providers.amazon.aws.triggers.glue_crawler import GlueCrawlerCompleteTrigger
 from airflow.providers.amazon.aws.utils import validate_execute_complete_event
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
     from airflow.utils.context import Context
-from airflow.models import BaseOperator
 from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
-class GlueCrawlerOperator(BaseOperator):
+class GlueCrawlerOperator(AwsBaseOperator[GlueCrawlerHook]):
     """
     Creates, updates and triggers an AWS Glue Crawler.
@@ -45,45 +45,45 @@ class GlueCrawlerOperator(BaseOperator):
         :ref:`howto/operator:GlueCrawlerOperator`
     :param config: Configurations for the AWS Glue crawler
-    :param aws_conn_id: The Airflow connection used for AWS credentials.
-        If this is None or empty then the default boto3 behaviour is used. If
-        running Airflow in a distributed manner and aws_conn_id is None or
-        empty, then default boto3 configuration would be used (and must be
-        maintained on each worker node).
     :param poll_interval: Time (in seconds) to wait between two consecutive calls to check crawler status
     :param wait_for_completion: Whether to wait for crawl execution completion. (default: True)
     :param deferrable: If True, the operator will wait asynchronously for the crawl to complete.
         This implies waiting for completion. This mode requires aiobotocore module to be installed.
         (default: False)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
-    template_fields: Sequence[str] = ("config",)
+    aws_hook_class = GlueCrawlerHook
+    template_fields: Sequence[str] = aws_template_fields(
+        "config",
+    )
     ui_color = "#ededed"
     def __init__(
         self,
         config,
-        aws_conn_id="aws_default",
-        region_name: str | None = None,
         poll_interval: int = 5,
         wait_for_completion: bool = True,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         **kwargs,
     ):
         super().__init__(**kwargs)
-        self.aws_conn_id = aws_conn_id
         self.poll_interval = poll_interval
         self.wait_for_completion = wait_for_completion
         self.deferrable = deferrable
-        self.region_name = region_name
         self.config = config
-    @cached_property
-    def hook(self) -> GlueCrawlerHook:
-        """Create and return a GlueCrawlerHook."""
-        return GlueCrawlerHook(self.aws_conn_id, region_name=self.region_name)
-    def execute(self, context: Context):
+    def execute(self, context: Context) -> str:
         """
         Execute AWS Glue Crawler from Airflow.
@@ -103,6 +103,9 @@ class GlueCrawlerOperator(BaseOperator):
                     crawler_name=crawler_name,
                     waiter_delay=self.poll_interval,
                     aws_conn_id=self.aws_conn_id,
+                    region_name=self.region_name,
+                    verify=self.verify,
+                    botocore_config=self.botocore_config,
                 ),
                 method_name="execute_complete",
             )

airflow/providers/amazon/aws/operators/neptune.py CHANGED Viewed

@@ -81,7 +81,8 @@ def handle_waitable_exception(
 class NeptuneStartDbClusterOperator(AwsBaseOperator[NeptuneHook]):
-    """Starts an Amazon Neptune DB cluster.
+    """
+    Starts an Amazon Neptune DB cluster.
     Amazon Neptune Database is a serverless graph database designed for superior scalability
     and availability. Neptune Database provides built-in security, continuous backups, and

airflow/providers/amazon/aws/operators/redshift_cluster.py CHANGED Viewed

@@ -38,7 +38,8 @@ if TYPE_CHECKING:
 class RedshiftCreateClusterOperator(BaseOperator):
-    """Creates a new cluster with the specified parameters.
+    """
+    Creates a new cluster with the specified parameters.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:

airflow/providers/amazon/aws/operators/sagemaker.py CHANGED Viewed

@@ -60,7 +60,8 @@ def serialize(result: dict) -> dict:
 class SageMakerBaseOperator(BaseOperator):
-    """This is the base operator for all SageMaker operators.
+    """
+    This is the base operator for all SageMaker operators.
     :param config: The configuration necessary to start a training job (templated)
     """

airflow/providers/amazon/aws/sensors/base_aws.py CHANGED Viewed

@@ -30,7 +30,8 @@ from airflow.utils.types import NOTSET, ArgNotSet
 class AwsBaseSensor(BaseSensorOperator, AwsBaseHookMixin[AwsHookType]):
-    """Base AWS (Amazon) Sensor Class for build sensors in top of AWS Hooks.
+    """
+    Base AWS (Amazon) Sensor Class for build sensors in top of AWS Hooks.
     .. warning::
         Only for internal usage, this class might be changed, renamed or removed in the future

airflow/providers/amazon/aws/sensors/glue_catalog_partition.py CHANGED Viewed

@@ -18,7 +18,6 @@
 from __future__ import annotations
 from datetime import timedelta
-from functools import cached_property
 from typing import TYPE_CHECKING, Any, Sequence
 from deprecated import deprecated
@@ -26,18 +25,23 @@ from deprecated import deprecated
 from airflow.configuration import conf
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
 from airflow.providers.amazon.aws.hooks.glue_catalog import GlueCatalogHook
+from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
 from airflow.providers.amazon.aws.triggers.glue import GlueCatalogPartitionTrigger
 from airflow.providers.amazon.aws.utils import validate_execute_complete_event
-from airflow.sensors.base import BaseSensorOperator
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
     from airflow.utils.context import Context
-class GlueCatalogPartitionSensor(BaseSensorOperator):
+class GlueCatalogPartitionSensor(AwsBaseSensor[GlueCatalogHook]):
     """
     Waits for a partition to show up in AWS Glue Catalog.
+    .. seealso::
+        For more information on how to use this sensor, take a look at the guide:
+        :ref:`howto/sensor:GlueCatalogPartitionSensor`
     :param table_name: The name of the table to wait for, supports the dot
         notation (my_database.my_table)
     :param expression: The partition clause to wait for. This is passed as
@@ -46,19 +50,27 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
         AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"``.
         See https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html
         #aws-glue-api-catalog-partitions-GetPartitions
-    :param aws_conn_id: ID of the Airflow connection where
-        credentials and extra configuration are stored
-    :param region_name: Optional aws region name (example: us-east-1). Uses region from connection
-        if not specified.
     :param database_name: The name of the catalog database where the partitions reside.
     :param poke_interval: Time in seconds that the job should wait in
         between each tries
     :param deferrable: If true, then the sensor will wait asynchronously for the partition to
         show up in the AWS Glue Catalog.
         (default: False, but can be overridden in config file by setting default_deferrable to True)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
-    template_fields: Sequence[str] = (
+    aws_hook_class = GlueCatalogHook
+    template_fields: Sequence[str] = aws_template_fields(
         "database_name",
         "table_name",
         "expression",
@@ -70,19 +82,16 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
         *,
         table_name: str,
         expression: str = "ds='{{ ds }}'",
-        aws_conn_id: str | None = "aws_default",
-        region_name: str | None = None,
         database_name: str = "default",
         poke_interval: int = 60 * 3,
         deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
         **kwargs,
     ):
-        super().__init__(poke_interval=poke_interval, **kwargs)
-        self.aws_conn_id = aws_conn_id
-        self.region_name = region_name
+        super().__init__(**kwargs)
         self.table_name = table_name
         self.expression = expression
         self.database_name = database_name
+        self.poke_interval = poke_interval
         self.deferrable = deferrable
     def execute(self, context: Context) -> Any:
@@ -93,7 +102,10 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
                     table_name=self.table_name,
                     expression=self.expression,
                     aws_conn_id=self.aws_conn_id,
+                    region_name=self.region_name,
                     waiter_delay=int(self.poke_interval),
+                    verify=self.verify,
+                    botocore_config=self.botocore_config,
                 ),
                 method_name="execute_complete",
                 timeout=timedelta(seconds=self.timeout),
@@ -126,7 +138,3 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
     def get_hook(self) -> GlueCatalogHook:
         """Get the GlueCatalogHook."""
         return self.hook
-    @cached_property
-    def hook(self) -> GlueCatalogHook:
-        return GlueCatalogHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)

airflow/providers/amazon/aws/sensors/glue_crawler.py CHANGED Viewed

@@ -17,20 +17,20 @@
 # under the License.
 from __future__ import annotations
-from functools import cached_property
 from typing import TYPE_CHECKING, Sequence
 from deprecated import deprecated
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
 from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
-from airflow.sensors.base import BaseSensorOperator
+from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
+from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
     from airflow.utils.context import Context
-class GlueCrawlerSensor(BaseSensorOperator):
+class GlueCrawlerSensor(AwsBaseSensor[GlueCrawlerHook]):
     """
     Waits for an AWS Glue crawler to reach any of the statuses below.
@@ -41,19 +41,27 @@ class GlueCrawlerSensor(BaseSensorOperator):
         :ref:`howto/sensor:GlueCrawlerSensor`
     :param crawler_name: The AWS Glue crawler unique name
-    :param aws_conn_id: aws connection to use, defaults to 'aws_default'
-        If this is None or empty then the default boto3 behaviour is used. If
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
         running Airflow in a distributed manner and aws_conn_id is None or
         empty, then default boto3 configuration would be used (and must be
         maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
     """
-    template_fields: Sequence[str] = ("crawler_name",)
+    aws_hook_class = GlueCrawlerHook
-    def __init__(self, *, crawler_name: str, aws_conn_id: str | None = "aws_default", **kwargs) -> None:
+    template_fields: Sequence[str] = aws_template_fields(
+        "crawler_name",
+    )
+    def __init__(self, *, crawler_name: str, **kwargs) -> None:
         super().__init__(**kwargs)
         self.crawler_name = crawler_name
-        self.aws_conn_id = aws_conn_id
         self.success_statuses = "SUCCEEDED"
         self.errored_statuses = ("FAILED", "CANCELLED")
@@ -79,7 +87,3 @@ class GlueCrawlerSensor(BaseSensorOperator):
     def get_hook(self) -> GlueCrawlerHook:
         """Return a new or pre-existing GlueCrawlerHook."""
         return self.hook
-    @cached_property
-    def hook(self) -> GlueCrawlerHook:
-        return GlueCrawlerHook(aws_conn_id=self.aws_conn_id)

airflow/providers/amazon/aws/transfers/mongo_to_s3.py CHANGED Viewed

@@ -34,7 +34,8 @@ if TYPE_CHECKING:
 class MongoToS3Operator(BaseOperator):
-    """Move data from MongoDB to S3.
+    """
+    Move data from MongoDB to S3.
     .. seealso::
         For more information on how to use this operator, take a look at the guide:
@@ -128,7 +129,8 @@ class MongoToS3Operator(BaseOperator):
     @staticmethod
     def _stringify(iterable: Iterable, joinable: str = "\n") -> str:
-        """Stringify an iterable of dicts.
+        """
+        Stringify an iterable of dicts.
         This dumps each dict with JSON, and joins them with ``joinable``.
         """
@@ -136,7 +138,8 @@ class MongoToS3Operator(BaseOperator):
     @staticmethod
     def transform(docs: Any) -> Any:
-        """Transform the data for transfer.
+        """
+        Transform the data for transfer.
         This method is meant to be extended by child classes to perform
         transformations unique to those operators needs. Processes pyMongo

airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py CHANGED Viewed

@@ -44,7 +44,8 @@ class KeySchema(TypedDict):
 class S3ToDynamoDBOperator(BaseOperator):
-    """Load Data from S3 into a DynamoDB.
+    """
+    Load Data from S3 into a DynamoDB.
     Data stored in S3 can be uploaded to a new or existing DynamoDB. Supported file formats CSV, DynamoDB JSON and
     Amazon ION.

airflow/providers/amazon/aws/transfers/s3_to_sql.py CHANGED Viewed

@@ -30,7 +30,8 @@ if TYPE_CHECKING:
 class S3ToSqlOperator(BaseOperator):
-    """Load Data from S3 into a SQL Database.
+    """
+    Load Data from S3 into a SQL Database.
     You need to provide a parser function that takes a filename as an input
     and returns an iterable of rows

airflow/providers/amazon/aws/triggers/ecs.py CHANGED Viewed

@@ -179,7 +179,9 @@ class TaskDoneTrigger(BaseTrigger):
                         cluster=self.cluster, tasks=[self.task_arn], WaiterConfig={"MaxAttempts": 1}
                     )
                     # we reach this point only if the waiter met a success criteria
-                    yield TriggerEvent({"status": "success", "task_arn": self.task_arn})
+                    yield TriggerEvent(
+                        {"status": "success", "task_arn": self.task_arn, "cluster": self.cluster}
+                    )
                     return
                 except WaiterError as error:
                     if "terminal failure" in str(error):

apache-airflow-providers-amazon 8.25.0rc1__py3-none-any.whl → 8.26.0rc1__py3-none-any.whl

apache-airflow-providers-amazon 8.25.0rc1py3-none-any.whl → 8.26.0rc1py3-none-any.whl