PyPI - apache-airflow-providers-amazon - Versions diffs - 9.5.0rc2__py3-none-any.whl → 9.6.0rc1__py3-none-any.whl - Mend

apache-airflow-providers-amazon 9.5.0rc2py3-none-any.whl → 9.6.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

airflow/providers/amazon/aws/log/cloudwatch_task_handler.py CHANGED Viewed

@@ -17,20 +17,31 @@
 # under the License.
 from __future__ import annotations
+import copy
+import json
+import logging
+import os
 from datetime import date, datetime, timedelta, timezone
 from functools import cached_property
+from pathlib import Path
 from typing import TYPE_CHECKING, Any
+import attrs
 import watchtower
 from airflow.configuration import conf
 from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
 from airflow.providers.amazon.aws.utils import datetime_to_epoch_utc_ms
+from airflow.providers.amazon.version_compat import AIRFLOW_V_3_0_PLUS
 from airflow.utils.log.file_task_handler import FileTaskHandler
 from airflow.utils.log.logging_mixin import LoggingMixin
 if TYPE_CHECKING:
-    from airflow.models import TaskInstance
+    import structlog.typing
+    from airflow.models.taskinstance import TaskInstance
+    from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI
+    from airflow.utils.log.file_task_handler import LogMessages, LogSourceInfo
 def json_serialize_legacy(value: Any) -> str | None:
@@ -62,6 +73,155 @@ def json_serialize(value: Any) -> str | None:
     return watchtower._json_serialize_default(value)
+@attrs.define(kw_only=True)
+class CloudWatchRemoteLogIO(LoggingMixin):  # noqa: D101
+    base_log_folder: Path = attrs.field(converter=Path)
+    remote_base: str = ""
+    delete_local_copy: bool = True
+    log_group_arn: str
+    log_stream_name: str = ""
+    log_group: str = attrs.field(init=False, repr=False)
+    region_name: str = attrs.field(init=False, repr=False)
+    @log_group.default
+    def _(self):
+        return self.log_group_arn.split(":")[6]
+    @region_name.default
+    def _(self):
+        return self.log_group_arn.split(":")[3]
+    @cached_property
+    def hook(self):
+        """Returns AwsLogsHook."""
+        return AwsLogsHook(
+            aws_conn_id=conf.get("logging", "remote_log_conn_id"), region_name=self.region_name
+        )
+    @cached_property
+    def handler(self) -> watchtower.CloudWatchLogHandler:
+        _json_serialize = conf.getimport("aws", "cloudwatch_task_handler_json_serializer", fallback=None)
+        return watchtower.CloudWatchLogHandler(
+            log_group_name=self.log_group,
+            log_stream_name=self.log_stream_name,
+            use_queues=True,
+            boto3_client=self.hook.get_conn(),
+            json_serialize_default=_json_serialize or json_serialize_legacy,
+        )
+    @cached_property
+    def processors(self) -> tuple[structlog.typing.Processor, ...]:
+        from logging import getLogRecordFactory
+        import structlog.stdlib
+        logRecordFactory = getLogRecordFactory()
+        # The handler MUST be initted here, before the processor is actually used to log anything.
+        # Otherwise, logging that occurs during the creation of the handler can create infinite loops.
+        _handler = self.handler
+        from airflow.sdk.log import relative_path_from_logger
+        def proc(logger: structlog.typing.WrappedLogger, method_name: str, event: structlog.typing.EventDict):
+            if not logger or not (stream_name := relative_path_from_logger(logger)):
+                return event
+            # Only init the handler stream_name once. We cannot do it above when we init the handler because
+            # we don't yet know the log path at that point.
+            if not _handler.log_stream_name:
+                _handler.log_stream_name = stream_name.as_posix().replace(":", "_")
+            name = event.get("logger_name") or event.get("logger", "")
+            level = structlog.stdlib.NAME_TO_LEVEL.get(method_name.lower(), logging.INFO)
+            msg = copy.copy(event)
+            created = None
+            if ts := msg.pop("timestamp", None):
+                try:
+                    created = datetime.fromisoformat(ts)
+                except Exception:
+                    pass
+            record = logRecordFactory(
+                name, level, pathname="", lineno=0, msg=msg, args=(), exc_info=None, func=None, sinfo=None
+            )
+            if created is not None:
+                ct = created.timestamp()
+                record.created = ct
+                record.msecs = int((ct - int(ct)) * 1000) + 0.0  # Copied from stdlib logging
+            _handler.handle(record)
+            return event
+        return (proc,)
+    def close(self):
+        self.handler.close()
+    def upload(self, path: os.PathLike | str, ti: RuntimeTI):
+        # No-op, as we upload via the processor as we go
+        # But we need to give the handler time to finish off its business
+        self.close()
+        return
+    def read(self, relative_path, ti: RuntimeTI) -> tuple[LogSourceInfo, LogMessages | None]:
+        logs: LogMessages | None = []
+        messages = [
+            f"Reading remote log from Cloudwatch log_group: {self.log_group} log_stream: {relative_path}"
+        ]
+        try:
+            if AIRFLOW_V_3_0_PLUS:
+                from airflow.utils.log.file_task_handler import StructuredLogMessage
+                logs = [
+                    StructuredLogMessage.model_validate(log)
+                    for log in self.get_cloudwatch_logs(relative_path, ti)
+                ]
+            else:
+                logs = [self.get_cloudwatch_logs(relative_path, ti)]  # type: ignore[arg-value]
+        except Exception as e:
+            logs = None
+            messages.append(str(e))
+        return messages, logs
+    def get_cloudwatch_logs(self, stream_name: str, task_instance: RuntimeTI):
+        """
+        Return all logs from the given log stream.
+        :param stream_name: name of the Cloudwatch log stream to get all logs from
+        :param task_instance: the task instance to get logs about
+        :return: string of all logs from the given log stream
+        """
+        stream_name = stream_name.replace(":", "_")
+        # If there is an end_date to the task instance, fetch logs until that date + 30 seconds
+        # 30 seconds is an arbitrary buffer so that we don't miss any logs that were emitted
+        end_time = (
+            None
+            if (end_date := getattr(task_instance, "end_date", None)) is None
+            else datetime_to_epoch_utc_ms(end_date + timedelta(seconds=30))
+        )
+        events = self.hook.get_log_events(
+            log_group=self.log_group,
+            log_stream_name=stream_name,
+            end_time=end_time,
+        )
+        if AIRFLOW_V_3_0_PLUS:
+            return list(self._event_to_dict(e) for e in events)
+        return "\n".join(self._event_to_str(event) for event in events)
+    def _event_to_dict(self, event: dict) -> dict:
+        event_dt = datetime.fromtimestamp(event["timestamp"] / 1000.0, tz=timezone.utc).isoformat()
+        message = event["message"]
+        try:
+            message = json.loads(message)
+            message["timestamp"] = event_dt
+            return message
+        except Exception:
+            return {"timestamp": event_dt, "event": message}
+    def _event_to_str(self, event: dict) -> str:
+        event_dt = datetime.fromtimestamp(event["timestamp"] / 1000.0, tz=timezone.utc)
+        formatted_event_dt = event_dt.strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
+        message = event["message"]
+        return f"[{formatted_event_dt}] {message}"
 class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
     """
     CloudwatchTaskHandler is a python log handler that handles and reads task instance logs.
@@ -84,6 +244,11 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
         self.region_name = split_arn[3]
         self.closed = False
+        self.io = CloudWatchRemoteLogIO(
+            base_log_folder=base_log_folder,
+            log_group_arn=log_group_arn,
+        )
     @cached_property
     def hook(self):
         """Returns AwsLogsHook."""
@@ -97,14 +262,9 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
     def set_context(self, ti: TaskInstance, *, identifier: str | None = None):
         super().set_context(ti)
-        _json_serialize = conf.getimport("aws", "cloudwatch_task_handler_json_serializer", fallback=None)
-        self.handler = watchtower.CloudWatchLogHandler(
-            log_group_name=self.log_group,
-            log_stream_name=self._render_filename(ti, ti.try_number),
-            use_queues=not getattr(ti, "is_trigger_log_context", False),
-            boto3_client=self.hook.get_conn(),
-            json_serialize_default=_json_serialize or json_serialize_legacy,
-        )
+        self.io.log_stream_name = self._render_filename(ti, ti.try_number)
+        self.handler = self.io.handler
     def close(self):
         """Close the handler responsible for the upload of the local log file to Cloudwatch."""
@@ -120,49 +280,9 @@ class CloudwatchTaskHandler(FileTaskHandler, LoggingMixin):
         # Mark closed so we don't double write if close is called twice
         self.closed = True
-    def _read(self, task_instance, try_number, metadata=None):
+    def _read_remote_logs(
+        self, task_instance, try_number, metadata=None
+    ) -> tuple[LogSourceInfo, LogMessages]:
         stream_name = self._render_filename(task_instance, try_number)
-        try:
-            return (
-                f"*** Reading remote log from Cloudwatch log_group: {self.log_group} "
-                f"log_stream: {stream_name}.\n"
-                f"{self.get_cloudwatch_logs(stream_name=stream_name, task_instance=task_instance)}\n",
-                {"end_of_log": True},
-            )
-        except Exception as e:
-            log = (
-                f"*** Unable to read remote logs from Cloudwatch (log_group: {self.log_group}, log_stream: "
-                f"{stream_name})\n*** {e}\n\n"
-            )
-            self.log.error(log)
-            local_log, metadata = super()._read(task_instance, try_number, metadata)
-            log += local_log
-            return log, metadata
-    def get_cloudwatch_logs(self, stream_name: str, task_instance: TaskInstance) -> str:
-        """
-        Return all logs from the given log stream.
-        :param stream_name: name of the Cloudwatch log stream to get all logs from
-        :param task_instance: the task instance to get logs about
-        :return: string of all logs from the given log stream
-        """
-        # If there is an end_date to the task instance, fetch logs until that date + 30 seconds
-        # 30 seconds is an arbitrary buffer so that we don't miss any logs that were emitted
-        end_time = (
-            None
-            if task_instance.end_date is None
-            else datetime_to_epoch_utc_ms(task_instance.end_date + timedelta(seconds=30))
-        )
-        events = self.hook.get_log_events(
-            log_group=self.log_group,
-            log_stream_name=stream_name,
-            end_time=end_time,
-        )
-        return "\n".join(self._event_to_str(event) for event in events)
-    def _event_to_str(self, event: dict) -> str:
-        event_dt = datetime.fromtimestamp(event["timestamp"] / 1000.0, tz=timezone.utc)
-        formatted_event_dt = event_dt.strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
-        message = event["message"]
-        return f"[{formatted_event_dt}] {message}"
+        messages, logs = self.io.read(stream_name, task_instance)
+        return messages, logs or []

airflow/providers/amazon/aws/log/s3_task_handler.py CHANGED Viewed

@@ -24,6 +24,8 @@ import shutil
 from functools import cached_property
 from typing import TYPE_CHECKING
+import attrs
 from airflow.configuration import conf
 from airflow.providers.amazon.aws.hooks.s3 import S3Hook
 from airflow.providers.amazon.version_compat import AIRFLOW_V_3_0_PLUS
@@ -32,28 +34,34 @@ from airflow.utils.log.logging_mixin import LoggingMixin
 if TYPE_CHECKING:
     from airflow.models.taskinstance import TaskInstance
+    from airflow.sdk.types import RuntimeTaskInstanceProtocol as RuntimeTI
+    from airflow.utils.log.file_task_handler import LogMessages, LogSourceInfo
-class S3TaskHandler(FileTaskHandler, LoggingMixin):
-    """
-    S3TaskHandler is a python log handler that handles and reads task instance logs.
+@attrs.define
+class S3RemoteLogIO(LoggingMixin):  # noqa: D101
+    remote_base: str
+    base_log_folder: pathlib.Path = attrs.field(converter=pathlib.Path)
+    delete_local_copy: bool
-    It extends airflow FileTaskHandler and uploads to and reads from S3 remote storage.
-    """
+    processors = ()
-    trigger_should_wrap = True
+    def upload(self, path: os.PathLike | str, ti: RuntimeTI):
+        """Upload the given log path to the remote storage."""
+        path = pathlib.Path(path)
+        if path.is_absolute():
+            local_loc = path
+            remote_loc = os.path.join(self.remote_base, path.relative_to(self.base_log_folder))
+        else:
+            local_loc = self.base_log_folder.joinpath(path)
+            remote_loc = os.path.join(self.remote_base, path)
-    def __init__(self, base_log_folder: str, s3_log_folder: str, **kwargs):
-        super().__init__(base_log_folder)
-        self.handler: logging.FileHandler | None = None
-        self.remote_base = s3_log_folder
-        self.log_relative_path = ""
-        self._hook = None
-        self.closed = False
-        self.upload_on_close = True
-        self.delete_local_copy = kwargs.get(
-            "delete_local_copy", conf.getboolean("logging", "delete_local_logs")
-        )
+        if local_loc.is_file():
+            # read log and remove old logs to get just the latest additions
+            log = local_loc.read_text()
+            has_uploaded = self.write(log, remote_loc)
+            if has_uploaded and self.delete_local_copy:
+                shutil.rmtree(os.path.dirname(local_loc))
     @cached_property
     def hook(self):
@@ -63,73 +71,6 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin):
             transfer_config_args={"use_threads": False},
         )
-    def set_context(self, ti: TaskInstance, *, identifier: str | None = None) -> None:
-        super().set_context(ti, identifier=identifier)
-        # Local location and remote location is needed to open and
-        # upload local log file to S3 remote storage.
-        if TYPE_CHECKING:
-            assert self.handler is not None
-        full_path = self.handler.baseFilename
-        self.log_relative_path = pathlib.Path(full_path).relative_to(self.local_base).as_posix()
-        is_trigger_log_context = getattr(ti, "is_trigger_log_context", False)
-        self.upload_on_close = is_trigger_log_context or not getattr(ti, "raw", None)
-        # Clear the file first so that duplicate data is not uploaded
-        # when reusing the same path (e.g. with rescheduled sensors)
-        if self.upload_on_close:
-            with open(self.handler.baseFilename, "w"):
-                pass
-    def close(self):
-        """Close and upload local log file to remote storage S3."""
-        # When application exit, system shuts down all handlers by
-        # calling close method. Here we check if logger is already
-        # closed to prevent uploading the log to remote storage multiple
-        # times when `logging.shutdown` is called.
-        if self.closed:
-            return
-        super().close()
-        if not self.upload_on_close:
-            return
-        local_loc = os.path.join(self.local_base, self.log_relative_path)
-        remote_loc = os.path.join(self.remote_base, self.log_relative_path)
-        if os.path.exists(local_loc):
-            # read log and remove old logs to get just the latest additions
-            log = pathlib.Path(local_loc).read_text()
-            write_to_s3 = self.s3_write(log, remote_loc)
-            if write_to_s3 and self.delete_local_copy:
-                shutil.rmtree(os.path.dirname(local_loc))
-        # Mark closed so we don't double write if close is called twice
-        self.closed = True
-    def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[list[str], list[str]]:
-        # Explicitly getting log relative path is necessary as the given
-        # task instance might be different than task instance passed in
-        # in set_context method.
-        worker_log_rel_path = self._render_filename(ti, try_number)
-        logs = []
-        messages = []
-        bucket, prefix = self.hook.parse_s3_url(s3url=os.path.join(self.remote_base, worker_log_rel_path))
-        keys = self.hook.list_keys(bucket_name=bucket, prefix=prefix)
-        if keys:
-            keys = sorted(f"s3://{bucket}/{key}" for key in keys)
-            if AIRFLOW_V_3_0_PLUS:
-                messages = keys
-            else:
-                messages.append("Found logs in s3:")
-                messages.extend(f"  * {key}" for key in keys)
-            for key in keys:
-                logs.append(self.s3_read(key, return_error=True))
-        else:
-            if not AIRFLOW_V_3_0_PLUS:
-                messages.append(f"No logs found on s3 for ti={ti}")
-        return messages, logs
     def s3_log_exists(self, remote_log_location: str) -> bool:
         """
         Check if remote_log_location exists in remote storage.
@@ -158,7 +99,7 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin):
                 return msg
         return ""
-    def s3_write(
+    def write(
         self,
         log: str,
         remote_log_location: str,
@@ -168,7 +109,7 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin):
         """
         Write the log to the remote_log_location; return `True` or fails silently and return `False`.
-        :param log: the log to write to the remote_log_location
+        :param log: the contents to write to the remote_log_location
         :param remote_log_location: the log's location in remote storage
         :param append: if False, any existing log file is overwritten. If True,
             the new log is appended to any existing logs.
@@ -205,3 +146,99 @@ class S3TaskHandler(FileTaskHandler, LoggingMixin):
                     self.log.exception("Could not write logs to %s", remote_log_location)
                     return False
         return True
+    def read(self, relative_path: str, ti: RuntimeTI) -> tuple[LogSourceInfo, LogMessages | None]:
+        logs: list[str] = []
+        messages = []
+        bucket, prefix = self.hook.parse_s3_url(s3url=os.path.join(self.remote_base, relative_path))
+        keys = self.hook.list_keys(bucket_name=bucket, prefix=prefix)
+        if keys:
+            keys = sorted(f"s3://{bucket}/{key}" for key in keys)
+            if AIRFLOW_V_3_0_PLUS:
+                messages = keys
+            else:
+                messages.append("Found logs in s3:")
+                messages.extend(f"  * {key}" for key in keys)
+            for key in keys:
+                logs.append(self.s3_read(key, return_error=True))
+            return messages, logs
+        else:
+            return messages, None
+class S3TaskHandler(FileTaskHandler, LoggingMixin):
+    """
+    S3TaskHandler is a python log handler that handles and reads task instance logs.
+    It extends airflow FileTaskHandler and uploads to and reads from S3 remote storage.
+    """
+    def __init__(self, base_log_folder: str, s3_log_folder: str, **kwargs):
+        super().__init__(base_log_folder)
+        self.handler: logging.FileHandler | None = None
+        self.remote_base = s3_log_folder
+        self.log_relative_path = ""
+        self._hook = None
+        self.closed = False
+        self.upload_on_close = True
+        self.io = S3RemoteLogIO(
+            remote_base=s3_log_folder,
+            base_log_folder=base_log_folder,
+            delete_local_copy=kwargs.get(
+                "delete_local_copy", conf.getboolean("logging", "delete_local_logs")
+            ),
+        )
+    def set_context(self, ti: TaskInstance, *, identifier: str | None = None) -> None:
+        super().set_context(ti, identifier=identifier)
+        # Local location and remote location is needed to open and
+        # upload local log file to S3 remote storage.
+        if TYPE_CHECKING:
+            assert self.handler is not None
+        self.ti = ti
+        full_path = self.handler.baseFilename
+        self.log_relative_path = pathlib.Path(full_path).relative_to(self.local_base).as_posix()
+        is_trigger_log_context = getattr(ti, "is_trigger_log_context", False)
+        self.upload_on_close = is_trigger_log_context or not getattr(ti, "raw", None)
+        # Clear the file first so that duplicate data is not uploaded
+        # when reusing the same path (e.g. with rescheduled sensors)
+        if self.upload_on_close:
+            with open(self.handler.baseFilename, "w"):
+                pass
+    def close(self):
+        """Close and upload local log file to remote storage S3."""
+        # When application exit, system shuts down all handlers by
+        # calling close method. Here we check if logger is already
+        # closed to prevent uploading the log to remote storage multiple
+        # times when `logging.shutdown` is called.
+        if self.closed:
+            return
+        super().close()
+        if not self.upload_on_close:
+            return
+        if hasattr(self, "ti"):
+            self.io.upload(self.log_relative_path, self.ti)
+        # Mark closed so we don't double write if close is called twice
+        self.closed = True
+    def _read_remote_logs(self, ti, try_number, metadata=None) -> tuple[LogSourceInfo, LogMessages]:
+        # Explicitly getting log relative path is necessary as the given
+        # task instance might be different than task instance passed in
+        # in set_context method.
+        worker_log_rel_path = self._render_filename(ti, try_number)
+        messages, logs = self.io.read(worker_log_rel_path, ti)
+        if logs is None:
+            logs = []
+            if not AIRFLOW_V_3_0_PLUS:
+                messages.append(f"No logs found on s3 for ti={ti}")
+        return messages, logs

airflow/providers/amazon/aws/operators/bedrock.py CHANGED Viewed

@@ -33,6 +33,7 @@ from airflow.providers.amazon.aws.hooks.bedrock import (
 )
 from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
 from airflow.providers.amazon.aws.triggers.bedrock import (
+    BedrockBatchInferenceCompletedTrigger,
     BedrockCustomizeModelCompletedTrigger,
     BedrockIngestionJobTrigger,
     BedrockKnowledgeBaseActiveTrigger,
@@ -869,3 +870,121 @@ class BedrockRetrieveOperator(AwsBaseOperator[BedrockAgentRuntimeHook]):
         self.log.info("\nQuery: %s\nRetrieved: %s", self.retrieval_query, result["retrievalResults"])
         return result
+class BedrockBatchInferenceOperator(AwsBaseOperator[BedrockHook]):
+    """
+    Create a batch inference job to invoke a model on multiple prompts.
+    .. seealso::
+        For more information on how to use this operator, take a look at the guide:
+        :ref:`howto/operator:BedrockBatchInferenceOperator`
+    :param job_name: A name to give the batch inference job. (templated)
+    :param role_arn: The ARN of the IAM role with permissions to create the knowledge base. (templated)
+    :param model_id: Name or ARN of the model to associate with this provisioned throughput. (templated)
+    :param input_uri: The S3 location of the input data. (templated)
+    :param output_uri: The S3 location of the output data. (templated)
+    :param invoke_kwargs: Additional keyword arguments to pass to the  API call. (templated)
+    :param wait_for_completion: Whether to wait for cluster to stop. (default: True)
+        NOTE:  The way batch inference jobs work, your jobs are added to a queue and done "eventually"
+        so using deferrable mode is much more practical than using wait_for_completion.
+    :param waiter_delay: Time in seconds to wait between status checks. (default: 60)
+    :param waiter_max_attempts: Maximum number of attempts to check for job completion. (default: 10)
+    :param deferrable: If True, the operator will wait asynchronously for the cluster to stop.
+        This implies waiting for completion. This mode requires aiobotocore module to be installed.
+        (default: False)
+    :param aws_conn_id: The Airflow connection used for AWS credentials.
+        If this is ``None`` or empty then the default boto3 behaviour is used. If
+        running Airflow in a distributed manner and aws_conn_id is None or
+        empty, then default boto3 configuration would be used (and must be
+        maintained on each worker node).
+    :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
+    :param verify: Whether or not to verify SSL certificates. See:
+        https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
+    :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
+        https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
+    """
+    aws_hook_class = BedrockHook
+    template_fields: Sequence[str] = aws_template_fields(
+        "job_name",
+        "role_arn",
+        "model_id",
+        "input_uri",
+        "output_uri",
+        "invoke_kwargs",
+    )
+    def __init__(
+        self,
+        job_name: str,
+        role_arn: str,
+        model_id: str,
+        input_uri: str,
+        output_uri: str,
+        invoke_kwargs: dict[str, Any] | None = None,
+        wait_for_completion: bool = True,
+        waiter_delay: int = 60,
+        waiter_max_attempts: int = 10,
+        deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.job_name = job_name
+        self.role_arn = role_arn
+        self.model_id = model_id
+        self.input_uri = input_uri
+        self.output_uri = output_uri
+        self.invoke_kwargs = invoke_kwargs or {}
+        self.wait_for_completion = wait_for_completion
+        self.waiter_delay = waiter_delay
+        self.waiter_max_attempts = waiter_max_attempts
+        self.deferrable = deferrable
+        self.activity = "Bedrock batch inference job"
+    def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
+        validated_event = validate_execute_complete_event(event)
+        if validated_event["status"] != "success":
+            raise AirflowException(f"Error while running {self.activity}: {validated_event}")
+        self.log.info("%s '%s' complete.", self.activity, validated_event["job_arn"])
+        return validated_event["job_arn"]
+    def execute(self, context: Context) -> str:
+        response = self.hook.conn.create_model_invocation_job(
+            jobName=self.job_name,
+            roleArn=self.role_arn,
+            modelId=self.model_id,
+            inputDataConfig={"s3InputDataConfig": {"s3Uri": self.input_uri}},
+            outputDataConfig={"s3OutputDataConfig": {"s3Uri": self.output_uri}},
+            **self.invoke_kwargs,
+        )
+        job_arn = response["jobArn"]
+        self.log.info("%s '%s' started with ARN: %s", self.activity, self.job_name, job_arn)
+        task_description = f"for {self.activity} '{self.job_name}' to complete."
+        if self.deferrable:
+            self.log.info("Deferring %s", task_description)
+            self.defer(
+                trigger=BedrockBatchInferenceCompletedTrigger(
+                    job_arn=job_arn,
+                    waiter_delay=self.waiter_delay,
+                    waiter_max_attempts=self.waiter_max_attempts,
+                    aws_conn_id=self.aws_conn_id,
+                ),
+                method_name="execute_complete",
+            )
+        elif self.wait_for_completion:
+            self.log.info("Waiting %s", task_description)
+            self.hook.get_waiter(waiter_name="batch_inference_complete").wait(
+                jobIdentifier=job_arn,
+                WaiterConfig={"Delay": self.waiter_delay, "MaxAttempts": self.waiter_max_attempts},
+            )
+        return job_arn

airflow/providers/amazon/aws/operators/ec2.py CHANGED Viewed

@@ -254,7 +254,7 @@ class EC2CreateInstanceOperator(AwsBaseOperator[EC2Hook]):
                 region_name=self.region_name,
                 api_type="client_type",
             ) """
-            self.hook.terminate_instances(InstanceIds=instance_ids)
+            self.hook.terminate_instances(instance_ids=instance_ids)
         super().on_kill()

apache-airflow-providers-amazon 9.5.0rc2__py3-none-any.whl → 9.6.0rc1__py3-none-any.whl

apache-airflow-providers-amazon 9.5.0rc2py3-none-any.whl → 9.6.0rc1py3-none-any.whl