PyPI - apache-airflow-providers-amazon - Versions diffs - 8.17.0rc2__py3-none-any.whl → 8.18.0rc2__py3-none-any.whl - Mend

apache-airflow-providers-amazon 8.17.0rc2py3-none-any.whl → 8.18.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

airflow/providers/amazon/aws/hooks/logs.py CHANGED Viewed

@@ -17,8 +17,11 @@
 # under the License.
 from __future__ import annotations
+import asyncio
 import warnings
-from typing import Generator
+from typing import Any, AsyncGenerator, Generator
+from botocore.exceptions import ClientError
 from airflow.exceptions import AirflowProviderDeprecationWarning
 from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
@@ -151,3 +154,84 @@ class AwsLogsHook(AwsBaseHook):
                 num_consecutive_empty_response = 0
             continuation_token.value = response["nextForwardToken"]
+    async def describe_log_streams_async(
+        self, log_group: str, stream_prefix: str, order_by: str, count: int
+    ) -> dict[str, Any] | None:
+        """Async function to get the list of log streams for the specified log group.
+        You can list all the log streams or filter the results by prefix. You can also control
+        how the results are ordered.
+        :param log_group: The name of the log group.
+        :param stream_prefix: The prefix to match.
+        :param order_by: If the value is LogStreamName , the results are ordered by log stream name.
+         If the value is LastEventTime , the results are ordered by the event time. The default value is LogStreamName.
+        :param count: The maximum number of items returned
+        """
+        async with self.async_conn as client:
+            try:
+                response: dict[str, Any] = await client.describe_log_streams(
+                    logGroupName=log_group,
+                    logStreamNamePrefix=stream_prefix,
+                    orderBy=order_by,
+                    limit=count,
+                )
+                return response
+            except ClientError as error:
+                # On the very first training job run on an account, there's no log group until
+                # the container starts logging, so ignore any errors thrown about that
+                if error.response["Error"]["Code"] == "ResourceNotFoundException":
+                    return None
+                raise error
+    async def get_log_events_async(
+        self,
+        log_group: str,
+        log_stream_name: str,
+        start_time: int = 0,
+        skip: int = 0,
+        start_from_head: bool = True,
+    ) -> AsyncGenerator[Any, dict[str, Any]]:
+        """Yield all the available items in a single log stream.
+        :param log_group: The name of the log group.
+        :param log_stream_name: The name of the specific stream.
+        :param start_time: The time stamp value to start reading the logs from (default: 0).
+        :param skip: The number of log entries to skip at the start (default: 0).
+            This is for when there are multiple entries at the same timestamp.
+        :param start_from_head: whether to start from the beginning (True) of the log or
+            at the end of the log (False).
+        """
+        next_token = None
+        while True:
+            if next_token is not None:
+                token_arg: dict[str, str] = {"nextToken": next_token}
+            else:
+                token_arg = {}
+            async with self.async_conn as client:
+                response = await client.get_log_events(
+                    logGroupName=log_group,
+                    logStreamName=log_stream_name,
+                    startTime=start_time,
+                    startFromHead=start_from_head,
+                    **token_arg,
+                )
+                events = response["events"]
+                event_count = len(events)
+                if event_count > skip:
+                    events = events[skip:]
+                    skip = 0
+                else:
+                    skip -= event_count
+                    events = []
+                for event in events:
+                    await asyncio.sleep(1)
+                    yield event
+                if next_token != response["nextForwardToken"]:
+                    next_token = response["nextForwardToken"]

airflow/providers/amazon/aws/hooks/neptune.py ADDED Viewed

@@ -0,0 +1,85 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import annotations
+from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
+class NeptuneHook(AwsBaseHook):
+    """
+    Interact with Amazon Neptune.
+    Additional arguments (such as ``aws_conn_id``) may be specified and
+    are passed down to the underlying AwsBaseHook.
+    .. seealso::
+        - :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook`
+    """
+    AVAILABLE_STATES = ["available"]
+    STOPPED_STATES = ["stopped"]
+    def __init__(self, *args, **kwargs):
+        kwargs["client_type"] = "neptune"
+        super().__init__(*args, **kwargs)
+    def wait_for_cluster_availability(self, cluster_id: str, delay: int = 30, max_attempts: int = 60) -> str:
+        """
+        Wait for Neptune cluster to start.
+        :param cluster_id: The ID of the cluster to wait for.
+        :param delay: Time in seconds to delay between polls.
+        :param max_attempts: Maximum number of attempts to poll for completion.
+        :return: The status of the cluster.
+        """
+        self.get_waiter("cluster_available").wait(
+            DBClusterIdentifier=cluster_id, WaiterConfig={"Delay": delay, "MaxAttempts": max_attempts}
+        )
+        status = self.get_cluster_status(cluster_id)
+        self.log.info("Finished waiting for cluster %s. Status is now %s", cluster_id, status)
+        return status
+    def wait_for_cluster_stopped(self, cluster_id: str, delay: int = 30, max_attempts: int = 60) -> str:
+        """
+        Wait for Neptune cluster to stop.
+        :param cluster_id: The ID of the cluster to wait for.
+        :param delay: Time in seconds to delay between polls.
+        :param max_attempts: Maximum number of attempts to poll for completion.
+        :return: The status of the cluster.
+        """
+        self.get_waiter("cluster_stopped").wait(
+            DBClusterIdentifier=cluster_id, WaiterConfig={"Delay": delay, "MaxAttempts": max_attempts}
+        )
+        status = self.get_cluster_status(cluster_id)
+        self.log.info("Finished waiting for cluster %s. Status is now %s", cluster_id, status)
+        return status
+    def get_cluster_status(self, cluster_id: str) -> str:
+        """
+        Get the status of a Neptune cluster.
+        :param cluster_id: The ID of the cluster to get the status of.
+        :return: The status of the cluster.
+        """
+        return self.get_conn().describe_db_clusters(DBClusterIdentifier=cluster_id)["DBClusters"][0]["Status"]

airflow/providers/amazon/aws/hooks/quicksight.py CHANGED Viewed

@@ -18,10 +18,10 @@
 from __future__ import annotations
 import time
-import warnings
 from functools import cached_property
 from botocore.exceptions import ClientError
+from deprecated import deprecated
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
 from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
@@ -172,14 +172,15 @@ class QuickSightHook(AwsBaseHook):
         return status
     @cached_property
-    def sts_hook(self):
-        warnings.warn(
-            f"`{type(self).__name__}.sts_hook` property is deprecated and will be removed in the future. "
+    @deprecated(
+        reason=(
+            "`QuickSightHook.sts_hook` property is deprecated and will be removed in the future. "
             "This property used for obtain AWS Account ID, "
-            f"please consider to use `{type(self).__name__}.account_id` instead",
-            AirflowProviderDeprecationWarning,
-            stacklevel=2,
-        )
+            "please consider to use `QuickSightHook.account_id` instead"
+        ),
+        category=AirflowProviderDeprecationWarning,
+    )
+    def sts_hook(self):
         from airflow.providers.amazon.aws.hooks.sts import StsHook
         return StsHook(aws_conn_id=self.aws_conn_id)

airflow/providers/amazon/aws/hooks/redshift_cluster.py CHANGED Viewed

@@ -17,10 +17,10 @@
 from __future__ import annotations
 import asyncio
-import warnings
 from typing import Any, Sequence
 import botocore.exceptions
+from deprecated import deprecated
 from airflow.exceptions import AirflowProviderDeprecationWarning
 from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseAsyncHook, AwsBaseHook
@@ -195,16 +195,17 @@ class RedshiftHook(AwsBaseHook):
             return None
+@deprecated(
+    reason=(
+        "`airflow.providers.amazon.aws.hook.base_aws.RedshiftAsyncHook` "
+        "has been deprecated and will be removed in future"
+    ),
+    category=AirflowProviderDeprecationWarning,
+)
 class RedshiftAsyncHook(AwsBaseAsyncHook):
     """Interact with AWS Redshift using aiobotocore library."""
     def __init__(self, *args, **kwargs):
-        warnings.warn(
-            "airflow.providers.amazon.aws.hook.base_aws.RedshiftAsyncHook has been deprecated and "
-            "will be removed in future",
-            AirflowProviderDeprecationWarning,
-            stacklevel=2,
-        )
         kwargs["client_type"] = "redshift"
         super().__init__(*args, **kwargs)

airflow/providers/amazon/aws/hooks/redshift_sql.py CHANGED Viewed

@@ -200,7 +200,7 @@ class RedshiftSQLHook(DbApiHook):
         return redshift_connector.connect(**conn_kwargs)
     def get_openlineage_database_info(self, connection: Connection) -> DatabaseInfo:
-        """Returns Redshift specific information for OpenLineage."""
+        """Return Redshift specific information for OpenLineage."""
         from airflow.providers.openlineage.sqlparser import DatabaseInfo
         authority = self._get_openlineage_redshift_authority_part(connection)
@@ -252,9 +252,9 @@ class RedshiftSQLHook(DbApiHook):
             return hostname
     def get_openlineage_database_dialect(self, connection: Connection) -> str:
-        """Returns redshift dialect."""
+        """Return redshift dialect."""
         return "redshift"
     def get_openlineage_default_schema(self) -> str | None:
-        """Returns current schema. This is usually changed with ``SEARCH_PATH`` parameter."""
+        """Return current schema. This is usually changed with ``SEARCH_PATH`` parameter."""
         return self.get_first("SELECT CURRENT_SCHEMA();")[0]

airflow/providers/amazon/aws/hooks/s3.py CHANGED Viewed

@@ -1369,6 +1369,10 @@ class S3Hook(AwsBaseHook):
         """
         Download a file from the S3 location to the local file system.
+        Note:
+            This function shadows the 'download_file' method of S3 API, but it is not the same.
+            If you want to use the original method from S3 API, please use 'S3Hook.get_conn().download_file()'
         .. seealso::
             - :external+boto3:py:meth:`S3.Object.download_fileobj`
@@ -1386,12 +1390,6 @@ class S3Hook(AwsBaseHook):
             Default: True.
         :return: the file name.
         """
-        self.log.info(
-            "This function shadows the 'download_file' method of S3 API, but it is not the same. If you "
-            "want to use the original method from S3 API, please call "
-            "'S3Hook.get_conn().download_file()'"
-        )
         self.log.info("Downloading source S3 file from Bucket %s with path %s", bucket_name, key)
         try:

airflow/providers/amazon/aws/hooks/sagemaker.py CHANGED Viewed

@@ -26,8 +26,9 @@ import warnings
 from collections import Counter, namedtuple
 from datetime import datetime
 from functools import partial
-from typing import Any, Callable, Generator, cast
+from typing import Any, AsyncGenerator, Callable, Generator, cast
+from asgiref.sync import sync_to_async
 from botocore.exceptions import ClientError
 from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -310,10 +311,12 @@ class SageMakerHook(AwsBaseHook):
                 max_ingestion_time,
             )
-            billable_time = (
-                describe_response["TrainingEndTime"] - describe_response["TrainingStartTime"]
-            ) * describe_response["ResourceConfig"]["InstanceCount"]
-            self.log.info("Billable seconds: %d", int(billable_time.total_seconds()) + 1)
+            billable_seconds = SageMakerHook.count_billable_seconds(
+                training_start_time=describe_response["TrainingStartTime"],
+                training_end_time=describe_response["TrainingEndTime"],
+                instance_count=describe_response["ResourceConfig"]["InstanceCount"],
+            )
+            self.log.info("Billable seconds: %d", billable_seconds)
         return response
@@ -811,10 +814,12 @@ class SageMakerHook(AwsBaseHook):
             if status in failed_states:
                 reason = last_description.get("FailureReason", "(No reason provided)")
                 raise AirflowException(f"Error training {job_name}: {status} Reason: {reason}")
-            billable_time = (
-                last_description["TrainingEndTime"] - last_description["TrainingStartTime"]
-            ) * instance_count
-            self.log.info("Billable seconds: %d", int(billable_time.total_seconds()) + 1)
+            billable_seconds = SageMakerHook.count_billable_seconds(
+                training_start_time=last_description["TrainingStartTime"],
+                training_end_time=last_description["TrainingEndTime"],
+                instance_count=instance_count,
+            )
+            self.log.info("Billable seconds: %d", billable_seconds)
     def list_training_jobs(
         self, name_contains: str | None = None, max_results: int | None = None, **kwargs
@@ -1300,3 +1305,125 @@ class SageMakerHook(AwsBaseHook):
             if "BestCandidate" in res:
                 return res["BestCandidate"]
         return None
+    @staticmethod
+    def count_billable_seconds(
+        training_start_time: datetime, training_end_time: datetime, instance_count: int
+    ) -> int:
+        billable_time = (training_end_time - training_start_time) * instance_count
+        return int(billable_time.total_seconds()) + 1
+    async def describe_training_job_async(self, job_name: str) -> dict[str, Any]:
+        """
+        Return the training job info associated with the name.
+        :param job_name: the name of the training job
+        """
+        async with self.async_conn as client:
+            response: dict[str, Any] = await client.describe_training_job(TrainingJobName=job_name)
+            return response
+    async def describe_training_job_with_log_async(
+        self,
+        job_name: str,
+        positions: dict[str, Any],
+        stream_names: list[str],
+        instance_count: int,
+        state: int,
+        last_description: dict[str, Any],
+        last_describe_job_call: float,
+    ) -> tuple[int, dict[str, Any], float]:
+        """
+        Return the training job info associated with job_name and print CloudWatch logs.
+        :param job_name: name of the job to check status
+        :param positions: A list of pairs of (timestamp, skip) which represents the last record
+            read from each stream.
+        :param stream_names: A list of the log stream names. The position of the stream in this list is
+            the stream number.
+        :param instance_count: Count of the instance created for the job initially
+        :param state: log state
+        :param last_description: Latest description of the training job
+        :param last_describe_job_call: previous job called time
+        """
+        log_group = "/aws/sagemaker/TrainingJobs"
+        if len(stream_names) < instance_count:
+            logs_hook = AwsLogsHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
+            streams = await logs_hook.describe_log_streams_async(
+                log_group=log_group,
+                stream_prefix=job_name + "/",
+                order_by="LogStreamName",
+                count=instance_count,
+            )
+            stream_names = [s["logStreamName"] for s in streams["logStreams"]] if streams else []
+            positions.update([(s, Position(timestamp=0, skip=0)) for s in stream_names if s not in positions])
+        if len(stream_names) > 0:
+            async for idx, event in self.get_multi_stream(log_group, stream_names, positions):
+                self.log.info(event["message"])
+                ts, count = positions[stream_names[idx]]
+                if event["timestamp"] == ts:
+                    positions[stream_names[idx]] = Position(timestamp=ts, skip=count + 1)
+                else:
+                    positions[stream_names[idx]] = Position(timestamp=event["timestamp"], skip=1)
+        if state == LogState.COMPLETE:
+            return state, last_description, last_describe_job_call
+        if state == LogState.JOB_COMPLETE:
+            state = LogState.COMPLETE
+        elif time.time() - last_describe_job_call >= 30:
+            description = await self.describe_training_job_async(job_name)
+            last_describe_job_call = time.time()
+            if await sync_to_async(secondary_training_status_changed)(description, last_description):
+                self.log.info(
+                    await sync_to_async(secondary_training_status_message)(description, last_description)
+                )
+                last_description = description
+            status = description["TrainingJobStatus"]
+            if status not in self.non_terminal_states:
+                state = LogState.JOB_COMPLETE
+        return state, last_description, last_describe_job_call
+    async def get_multi_stream(
+        self, log_group: str, streams: list[str], positions: dict[str, Any]
+    ) -> AsyncGenerator[Any, tuple[int, Any | None]]:
+        """Iterate over the available events coming and interleaving the events from each stream so they're yielded in timestamp order.
+        :param log_group: The name of the log group.
+        :param streams: A list of the log stream names. The position of the stream in this list is
+            the stream number.
+        :param positions: A list of pairs of (timestamp, skip) which represents the last record
+            read from each stream.
+        """
+        positions = positions or {s: Position(timestamp=0, skip=0) for s in streams}
+        events: list[Any | None] = []
+        logs_hook = AwsLogsHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
+        event_iters = [
+            logs_hook.get_log_events_async(log_group, s, positions[s].timestamp, positions[s].skip)
+            for s in streams
+        ]
+        for event_stream in event_iters:
+            if not event_stream:
+                events.append(None)
+                continue
+            try:
+                events.append(await event_stream.__anext__())
+            except StopAsyncIteration:
+                events.append(None)
+            while any(events):
+                i = argmin(events, lambda x: x["timestamp"] if x else 9999999999) or 0
+                yield i, events[i]
+                try:
+                    events[i] = await event_iters[i].__anext__()
+                except StopAsyncIteration:
+                    events[i] = None

airflow/providers/amazon/aws/links/emr.py CHANGED Viewed

@@ -17,8 +17,10 @@
 from __future__ import annotations
 from typing import TYPE_CHECKING, Any
+from urllib.parse import ParseResult, quote_plus, urlparse
 from airflow.exceptions import AirflowException
+from airflow.providers.amazon.aws.hooks.emr import EmrServerlessHook
 from airflow.providers.amazon.aws.hooks.s3 import S3Hook
 from airflow.providers.amazon.aws.links.base_aws import BASE_AWS_CONSOLE_LINK, BaseAwsLink
 from airflow.utils.helpers import exactly_one
@@ -28,7 +30,7 @@ if TYPE_CHECKING:
 class EmrClusterLink(BaseAwsLink):
-    """Helper class for constructing AWS EMR Cluster Link."""
+    """Helper class for constructing Amazon EMR Cluster Link."""
     name = "EMR Cluster"
     key = "emr_cluster"
@@ -36,7 +38,7 @@ class EmrClusterLink(BaseAwsLink):
 class EmrLogsLink(BaseAwsLink):
-    """Helper class for constructing AWS EMR Logs Link."""
+    """Helper class for constructing Amazon EMR Logs Link."""
     name = "EMR Cluster Logs"
     key = "emr_logs"
@@ -48,6 +50,49 @@ class EmrLogsLink(BaseAwsLink):
         return super().format_link(**kwargs)
+def get_serverless_log_uri(*, s3_log_uri: str, application_id: str, job_run_id: str) -> str:
+    """
+    Retrieve the S3 URI to EMR Serverless Job logs.
+    Any EMR Serverless job may have a different S3 logging location (or none), which is an S3 URI.
+    The logging location is then {s3_uri}/applications/{application_id}/jobs/{job_run_id}.
+    """
+    return f"{s3_log_uri}/applications/{application_id}/jobs/{job_run_id}"
+def get_serverless_dashboard_url(
+    *,
+    aws_conn_id: str | None = None,
+    emr_serverless_client: boto3.client = None,
+    application_id: str,
+    job_run_id: str,
+) -> ParseResult | None:
+    """
+    Retrieve the URL to EMR Serverless dashboard.
+    The URL is a one-use, ephemeral link that expires in 1 hour and is accessible without authentication.
+    Either an AWS connection ID or existing EMR Serverless client must be passed.
+    If the connection ID is passed, a client is generated using that connection.
+    """
+    if not exactly_one(aws_conn_id, emr_serverless_client):
+        raise AirflowException("Requires either an AWS connection ID or an EMR Serverless Client.")
+    if aws_conn_id:
+        # If get_dashboard_for_job_run fails for whatever reason, fail after 1 attempt
+        # so that the rest of the links load in a reasonable time frame.
+        hook = EmrServerlessHook(aws_conn_id=aws_conn_id, config={"retries": {"total_max_attempts": 1}})
+        emr_serverless_client = hook.conn
+    response = emr_serverless_client.get_dashboard_for_job_run(
+        applicationId=application_id, jobRunId=job_run_id
+    )
+    if "url" not in response:
+        return None
+    log_uri = urlparse(response["url"])
+    return log_uri
 def get_log_uri(
     *, cluster: dict[str, Any] | None = None, emr_client: boto3.client = None, job_flow_id: str | None = None
 ) -> str | None:
@@ -66,3 +111,78 @@ def get_log_uri(
         return None
     log_uri = S3Hook.parse_s3_url(cluster_info["LogUri"])
     return "/".join(log_uri)
+class EmrServerlessLogsLink(BaseAwsLink):
+    """Helper class for constructing Amazon EMR Serverless link to Spark stdout logs."""
+    name = "Spark Driver stdout"
+    key = "emr_serverless_logs"
+    def format_link(self, application_id: str | None = None, job_run_id: str | None = None, **kwargs) -> str:
+        if not application_id or not job_run_id:
+            return ""
+        url = get_serverless_dashboard_url(
+            aws_conn_id=kwargs.get("conn_id"), application_id=application_id, job_run_id=job_run_id
+        )
+        if url:
+            return url._replace(path="/logs/SPARK_DRIVER/stdout.gz").geturl()
+        else:
+            return ""
+class EmrServerlessDashboardLink(BaseAwsLink):
+    """Helper class for constructing Amazon EMR Serverless Dashboard Link."""
+    name = "EMR Serverless Dashboard"
+    key = "emr_serverless_dashboard"
+    def format_link(self, application_id: str | None = None, job_run_id: str | None = None, **kwargs) -> str:
+        if not application_id or not job_run_id:
+            return ""
+        url = get_serverless_dashboard_url(
+            aws_conn_id=kwargs.get("conn_id"), application_id=application_id, job_run_id=job_run_id
+        )
+        if url:
+            return url.geturl()
+        else:
+            return ""
+class EmrServerlessS3LogsLink(BaseAwsLink):
+    """Helper class for constructing link to S3 console for Amazon EMR Serverless Logs."""
+    name = "S3 Logs"
+    key = "emr_serverless_s3_logs"
+    format_str = BASE_AWS_CONSOLE_LINK + (
+        "/s3/buckets/{bucket_name}?region={region_name}"
+        "&prefix={prefix}/applications/{application_id}/jobs/{job_run_id}/"
+    )
+    def format_link(self, **kwargs) -> str:
+        bucket, prefix = S3Hook.parse_s3_url(kwargs["log_uri"])
+        kwargs["bucket_name"] = bucket
+        kwargs["prefix"] = prefix.rstrip("/")
+        return super().format_link(**kwargs)
+class EmrServerlessCloudWatchLogsLink(BaseAwsLink):
+    """
+    Helper class for constructing link to CloudWatch console for Amazon EMR Serverless Logs.
+    This is a deep link that filters on a specific job run.
+    """
+    name = "CloudWatch Logs"
+    key = "emr_serverless_cloudwatch_logs"
+    format_str = (
+        BASE_AWS_CONSOLE_LINK
+        + "/cloudwatch/home?region={region_name}#logsV2:log-groups/log-group/{awslogs_group}{stream_prefix}"
+    )
+    def format_link(self, **kwargs) -> str:
+        kwargs["awslogs_group"] = quote_plus(kwargs["awslogs_group"])
+        kwargs["stream_prefix"] = quote_plus("?logStreamNameFilter=").replace("%", "$") + quote_plus(
+            kwargs["stream_prefix"]
+        )
+        return super().format_link(**kwargs)

airflow/providers/amazon/aws/log/cloudwatch_task_handler.py CHANGED Viewed

@@ -17,7 +17,7 @@
 # under the License.
 from __future__ import annotations
-from datetime import date, datetime, timedelta
+from datetime import date, datetime, timedelta, timezone
 from functools import cached_property
 from typing import TYPE_CHECKING, Any
@@ -163,7 +163,7 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
         return "\n".join(self._event_to_str(event) for event in events)
     def _event_to_str(self, event: dict) -> str:
-        event_dt = datetime.utcfromtimestamp(event["timestamp"] / 1000.0)
+        event_dt = datetime.fromtimestamp(event["timestamp"] / 1000.0, tz=timezone.utc)
         formatted_event_dt = event_dt.strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
         message = event["message"]
         return f"[{formatted_event_dt}] {message}"

airflow/providers/amazon/aws/operators/athena.py CHANGED Viewed

@@ -26,6 +26,7 @@ from airflow.providers.amazon.aws.hooks.athena import AthenaHook
 from airflow.providers.amazon.aws.links.athena import AthenaQueryResultsLink
 from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
 from airflow.providers.amazon.aws.triggers.athena import AthenaTrigger
+from airflow.providers.amazon.aws.utils import validate_execute_complete_event
 from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
 if TYPE_CHECKING:
@@ -179,7 +180,9 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
         return self.query_execution_id
-    def execute_complete(self, context, event=None):
+    def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
+        event = validate_execute_complete_event(event)
         if event["status"] != "success":
             raise AirflowException(f"Error while waiting for operation on cluster to complete: {event}")
         return event["value"]

apache-airflow-providers-amazon 8.17.0rc2__py3-none-any.whl → 8.18.0rc2__py3-none-any.whl

apache-airflow-providers-amazon 8.17.0rc2py3-none-any.whl → 8.18.0rc2py3-none-any.whl