apache-airflow-providers-amazon 9.15.0__py3-none-any.whl → 9.18.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. airflow/providers/amazon/__init__.py +3 -3
  2. airflow/providers/amazon/aws/auth_manager/routes/login.py +7 -1
  3. airflow/providers/amazon/aws/executors/aws_lambda/docker/app.py +5 -1
  4. airflow/providers/amazon/aws/hooks/athena.py +6 -2
  5. airflow/providers/amazon/aws/hooks/athena_sql.py +2 -2
  6. airflow/providers/amazon/aws/hooks/base_aws.py +2 -2
  7. airflow/providers/amazon/aws/hooks/batch_client.py +4 -6
  8. airflow/providers/amazon/aws/hooks/batch_waiters.py +0 -1
  9. airflow/providers/amazon/aws/hooks/chime.py +1 -1
  10. airflow/providers/amazon/aws/hooks/datasync.py +3 -3
  11. airflow/providers/amazon/aws/hooks/firehose.py +56 -0
  12. airflow/providers/amazon/aws/hooks/glue.py +7 -1
  13. airflow/providers/amazon/aws/hooks/kinesis.py +31 -13
  14. airflow/providers/amazon/aws/hooks/mwaa.py +38 -7
  15. airflow/providers/amazon/aws/hooks/redshift_sql.py +20 -6
  16. airflow/providers/amazon/aws/hooks/s3.py +12 -5
  17. airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +1 -1
  18. airflow/providers/amazon/aws/hooks/ssm.py +34 -6
  19. airflow/providers/amazon/aws/hooks/step_function.py +1 -1
  20. airflow/providers/amazon/aws/links/base_aws.py +1 -1
  21. airflow/providers/amazon/aws/operators/base_aws.py +2 -2
  22. airflow/providers/amazon/aws/operators/bedrock.py +2 -0
  23. airflow/providers/amazon/aws/operators/cloud_formation.py +2 -2
  24. airflow/providers/amazon/aws/operators/datasync.py +2 -1
  25. airflow/providers/amazon/aws/operators/emr.py +20 -11
  26. airflow/providers/amazon/aws/operators/mwaa.py +12 -3
  27. airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +1 -1
  28. airflow/providers/amazon/aws/operators/ssm.py +122 -17
  29. airflow/providers/amazon/aws/secrets/secrets_manager.py +3 -4
  30. airflow/providers/amazon/aws/sensors/base_aws.py +2 -2
  31. airflow/providers/amazon/aws/sensors/mwaa.py +14 -1
  32. airflow/providers/amazon/aws/sensors/s3.py +3 -2
  33. airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +1 -1
  34. airflow/providers/amazon/aws/sensors/ssm.py +33 -17
  35. airflow/providers/amazon/aws/transfers/azure_blob_to_s3.py +3 -3
  36. airflow/providers/amazon/aws/transfers/base.py +5 -5
  37. airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +4 -4
  38. airflow/providers/amazon/aws/transfers/exasol_to_s3.py +1 -1
  39. airflow/providers/amazon/aws/transfers/ftp_to_s3.py +1 -1
  40. airflow/providers/amazon/aws/transfers/gcs_to_s3.py +48 -5
  41. airflow/providers/amazon/aws/transfers/glacier_to_gcs.py +1 -1
  42. airflow/providers/amazon/aws/transfers/google_api_to_s3.py +2 -5
  43. airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py +1 -1
  44. airflow/providers/amazon/aws/transfers/http_to_s3.py +1 -1
  45. airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py +1 -1
  46. airflow/providers/amazon/aws/transfers/local_to_s3.py +1 -1
  47. airflow/providers/amazon/aws/transfers/mongo_to_s3.py +1 -1
  48. airflow/providers/amazon/aws/transfers/redshift_to_s3.py +6 -6
  49. airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py +1 -1
  50. airflow/providers/amazon/aws/transfers/s3_to_ftp.py +1 -1
  51. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +6 -6
  52. airflow/providers/amazon/aws/transfers/s3_to_sftp.py +1 -1
  53. airflow/providers/amazon/aws/transfers/s3_to_sql.py +1 -1
  54. airflow/providers/amazon/aws/transfers/salesforce_to_s3.py +1 -1
  55. airflow/providers/amazon/aws/transfers/sftp_to_s3.py +1 -1
  56. airflow/providers/amazon/aws/transfers/sql_to_s3.py +4 -5
  57. airflow/providers/amazon/aws/triggers/bedrock.py +1 -1
  58. airflow/providers/amazon/aws/triggers/s3.py +29 -2
  59. airflow/providers/amazon/aws/triggers/ssm.py +17 -1
  60. airflow/providers/amazon/aws/utils/connection_wrapper.py +2 -5
  61. airflow/providers/amazon/aws/utils/mixins.py +1 -1
  62. airflow/providers/amazon/aws/utils/waiter.py +2 -2
  63. airflow/providers/amazon/aws/waiters/emr.json +6 -6
  64. airflow/providers/amazon/get_provider_info.py +19 -1
  65. airflow/providers/amazon/version_compat.py +19 -16
  66. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.0rc2.dist-info}/METADATA +25 -19
  67. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.0rc2.dist-info}/RECORD +71 -69
  68. apache_airflow_providers_amazon-9.18.0rc2.dist-info/licenses/NOTICE +5 -0
  69. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.0rc2.dist-info}/WHEEL +0 -0
  70. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.0rc2.dist-info}/entry_points.txt +0 -0
  71. {airflow/providers/amazon → apache_airflow_providers_amazon-9.18.0rc2.dist-info/licenses}/LICENSE +0 -0
@@ -24,7 +24,7 @@ from tempfile import NamedTemporaryFile
24
24
  from typing import TYPE_CHECKING
25
25
 
26
26
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
- from airflow.providers.amazon.version_compat import BaseOperator
27
+ from airflow.providers.common.compat.sdk import BaseOperator
28
28
  from airflow.providers.exasol.hooks.exasol import ExasolHook
29
29
 
30
30
  if TYPE_CHECKING:
@@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.ftp.hooks.ftp import FTPHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -27,7 +27,7 @@ from packaging.version import Version
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
- from airflow.providers.amazon.version_compat import BaseOperator
30
+ from airflow.providers.common.compat.sdk import BaseOperator
31
31
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
32
32
 
33
33
  if TYPE_CHECKING:
@@ -39,6 +39,11 @@ class GCSToS3Operator(BaseOperator):
39
39
  """
40
40
  Synchronizes a Google Cloud Storage bucket with an S3 bucket.
41
41
 
42
+ .. note::
43
+ When flatten_structure=True, it takes precedence over keep_directory_structure.
44
+ For example, with flatten_structure=True, "folder/subfolder/file.txt" becomes "file.txt"
45
+ regardless of the keep_directory_structure setting.
46
+
42
47
  .. seealso::
43
48
  For more information on how to use this operator, take a look at the guide:
44
49
  :ref:`howto/operator:GCSToS3Operator`
@@ -79,6 +84,9 @@ class GCSToS3Operator(BaseOperator):
79
84
  object to be uploaded in S3
80
85
  :param keep_directory_structure: (Optional) When set to False the path of the file
81
86
  on the bucket is recreated within path passed in dest_s3_key.
87
+ :param flatten_structure: (Optional) When set to True, places all files directly
88
+ in the dest_s3_key directory without preserving subdirectory structure.
89
+ Takes precedence over keep_directory_structure when enabled.
82
90
  :param match_glob: (Optional) filters objects based on the glob pattern given by the string
83
91
  (e.g, ``'**/*/.json'``)
84
92
  :param gcp_user_project: (Optional) The identifier of the Google Cloud project to bill for this request.
@@ -108,6 +116,7 @@ class GCSToS3Operator(BaseOperator):
108
116
  dest_s3_extra_args: dict | None = None,
109
117
  s3_acl_policy: str | None = None,
110
118
  keep_directory_structure: bool = True,
119
+ flatten_structure: bool = False,
111
120
  match_glob: str | None = None,
112
121
  gcp_user_project: str | None = None,
113
122
  **kwargs,
@@ -124,6 +133,10 @@ class GCSToS3Operator(BaseOperator):
124
133
  self.dest_s3_extra_args = dest_s3_extra_args or {}
125
134
  self.s3_acl_policy = s3_acl_policy
126
135
  self.keep_directory_structure = keep_directory_structure
136
+ self.flatten_structure = flatten_structure
137
+
138
+ if self.flatten_structure and self.keep_directory_structure:
139
+ self.log.warning("flatten_structure=True takes precedence over keep_directory_structure=True")
127
140
  try:
128
141
  from airflow.providers.google import __version__ as _GOOGLE_PROVIDER_VERSION
129
142
 
@@ -140,6 +153,17 @@ class GCSToS3Operator(BaseOperator):
140
153
  self.match_glob = match_glob
141
154
  self.gcp_user_project = gcp_user_project
142
155
 
156
+ def _transform_file_path(self, file_path: str) -> str:
157
+ """
158
+ Transform the GCS file path according to the specified options.
159
+
160
+ :param file_path: The original GCS file path
161
+ :return: The transformed file path for S3 destination
162
+ """
163
+ if self.flatten_structure:
164
+ return os.path.basename(file_path)
165
+ return file_path
166
+
143
167
  def execute(self, context: Context) -> list[str]:
144
168
  # list all files in an Google Cloud Storage bucket
145
169
  gcs_hook = GCSHook(
@@ -167,7 +191,7 @@ class GCSToS3Operator(BaseOperator):
167
191
  aws_conn_id=self.dest_aws_conn_id, verify=self.dest_verify, extra_args=self.dest_s3_extra_args
168
192
  )
169
193
 
170
- if not self.keep_directory_structure and self.prefix:
194
+ if not self.keep_directory_structure and self.prefix and not self.flatten_structure:
171
195
  self.dest_s3_key = os.path.join(self.dest_s3_key, self.prefix)
172
196
 
173
197
  if not self.replace:
@@ -187,15 +211,34 @@ class GCSToS3Operator(BaseOperator):
187
211
  existing_files = existing_files or []
188
212
  # remove the prefix for the existing files to allow the match
189
213
  existing_files = [file.replace(prefix, "", 1) for file in existing_files]
190
- gcs_files = list(set(gcs_files) - set(existing_files))
214
+
215
+ # Transform GCS files for comparison and filter out existing ones
216
+ existing_files_set = set(existing_files)
217
+ filtered_files = []
218
+ seen_transformed = set()
219
+
220
+ for file in gcs_files:
221
+ transformed = self._transform_file_path(file)
222
+ if transformed not in existing_files_set and transformed not in seen_transformed:
223
+ filtered_files.append(file)
224
+ seen_transformed.add(transformed)
225
+ elif transformed in seen_transformed:
226
+ self.log.warning(
227
+ "Skipping duplicate file %s (transforms to %s)",
228
+ file,
229
+ transformed,
230
+ )
231
+
232
+ gcs_files = filtered_files
191
233
 
192
234
  if gcs_files:
193
235
  for file in gcs_files:
194
236
  with gcs_hook.provide_file(
195
237
  object_name=file, bucket_name=str(self.gcs_bucket), user_project=self.gcp_user_project
196
238
  ) as local_tmp_file:
197
- dest_key = os.path.join(self.dest_s3_key, file)
198
- self.log.info("Saving file to %s", dest_key)
239
+ transformed_path = self._transform_file_path(file)
240
+ dest_key = os.path.join(self.dest_s3_key, transformed_path)
241
+ self.log.info("Saving file from %s to %s", file, dest_key)
199
242
  s3_hook.load_file(
200
243
  filename=local_tmp_file.name,
201
244
  key=dest_key,
@@ -22,7 +22,7 @@ from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.glacier import GlacierHook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -26,14 +26,11 @@ from typing import TYPE_CHECKING
26
26
 
27
27
  from airflow.models.xcom import XCOM_RETURN_KEY
28
28
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
29
- from airflow.providers.amazon.version_compat import BaseOperator
29
+ from airflow.providers.common.compat.sdk import BaseOperator
30
30
  from airflow.providers.google.common.hooks.discovery_api import GoogleDiscoveryApiHook
31
31
 
32
32
  if TYPE_CHECKING:
33
- try:
34
- from airflow.sdk.types import RuntimeTaskInstanceProtocol
35
- except ImportError:
36
- from airflow.models import TaskInstance as RuntimeTaskInstanceProtocol # type: ignore[assignment]
33
+ from airflow.providers.common.compat.sdk import RuntimeTaskInstanceProtocol
37
34
  from airflow.utils.context import Context
38
35
 
39
36
  # MAX XCOM Size is 48KB
@@ -24,8 +24,8 @@ from collections.abc import Callable, Sequence
24
24
  from typing import TYPE_CHECKING, Literal
25
25
 
26
26
  from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
27
- from airflow.providers.amazon.version_compat import BaseOperator
28
27
  from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook
28
+ from airflow.providers.common.compat.sdk import BaseOperator
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from airflow.utils.context import Context
@@ -23,7 +23,7 @@ from functools import cached_property
23
23
  from typing import TYPE_CHECKING, Any
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.http.hooks.http import HttpHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -23,7 +23,7 @@ from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.imap.hooks.imap import ImapHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -21,7 +21,7 @@ from collections.abc import Sequence
21
21
  from typing import TYPE_CHECKING
22
22
 
23
23
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
24
- from airflow.providers.amazon.version_compat import BaseOperator
24
+ from airflow.providers.common.compat.sdk import BaseOperator
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  from airflow.utils.context import Context
@@ -24,7 +24,7 @@ from typing import TYPE_CHECKING, Any, cast
24
24
  from bson import json_util
25
25
 
26
26
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
- from airflow.providers.amazon.version_compat import BaseOperator
27
+ from airflow.providers.common.compat.sdk import BaseOperator
28
28
  from airflow.providers.mongo.hooks.mongo import MongoHook
29
29
 
30
30
  if TYPE_CHECKING:
@@ -28,8 +28,8 @@ from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
28
28
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
29
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
30
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
31
- from airflow.providers.amazon.version_compat import BaseOperator
32
- from airflow.utils.types import NOTSET, ArgNotSet
31
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet, is_arg_set
32
+ from airflow.providers.common.compat.sdk import BaseOperator
33
33
 
34
34
  if TYPE_CHECKING:
35
35
  from airflow.utils.context import Context
@@ -131,12 +131,12 @@ class RedshiftToS3Operator(BaseOperator):
131
131
  # actually provide a connection note that, because we don't want to let the exception bubble up in
132
132
  # that case (since we're silently injecting a connection on their behalf).
133
133
  self._aws_conn_id: str | None
134
- if isinstance(aws_conn_id, ArgNotSet):
135
- self.conn_set = False
136
- self._aws_conn_id = "aws_default"
137
- else:
134
+ if is_arg_set(aws_conn_id):
138
135
  self.conn_set = True
139
136
  self._aws_conn_id = aws_conn_id
137
+ else:
138
+ self.conn_set = False
139
+ self._aws_conn_id = "aws_default"
140
140
 
141
141
  def _build_unload_query(
142
142
  self, credentials_block: str, select_query: str, s3_key: str, unload_options: str
@@ -24,7 +24,7 @@ from botocore.exceptions import ClientError, WaiterError
24
24
 
25
25
  from airflow.exceptions import AirflowException
26
26
  from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
27
- from airflow.providers.amazon.version_compat import BaseOperator
27
+ from airflow.providers.common.compat.sdk import BaseOperator
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from airflow.utils.context import Context
@@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.ftp.hooks.ftp import FTPHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -24,8 +24,8 @@ from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
24
24
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
26
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
27
- from airflow.providers.amazon.version_compat import BaseOperator
28
- from airflow.utils.types import NOTSET, ArgNotSet
27
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet, is_arg_set
28
+ from airflow.providers.common.compat.sdk import BaseOperator
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from airflow.utils.context import Context
@@ -122,12 +122,12 @@ class S3ToRedshiftOperator(BaseOperator):
122
122
  # actually provide a connection note that, because we don't want to let the exception bubble up in
123
123
  # that case (since we're silently injecting a connection on their behalf).
124
124
  self._aws_conn_id: str | None
125
- if isinstance(aws_conn_id, ArgNotSet):
126
- self.conn_set = False
127
- self._aws_conn_id = "aws_default"
128
- else:
125
+ if is_arg_set(aws_conn_id):
129
126
  self.conn_set = True
130
127
  self._aws_conn_id = aws_conn_id
128
+ else:
129
+ self.conn_set = False
130
+ self._aws_conn_id = "aws_default"
131
131
 
132
132
  if self.redshift_data_api_kwargs:
133
133
  for arg in ["sql", "parameters"]:
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING
23
23
  from urllib.parse import urlsplit
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.ssh.hooks.ssh import SSHHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.exceptions import AirflowException
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseHook, BaseOperator
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from airflow.utils.context import Context
@@ -22,7 +22,7 @@ from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING
23
23
  from urllib.parse import urlsplit
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.ssh.hooks.ssh import SSHHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -27,7 +27,7 @@ from typing import TYPE_CHECKING, Any, Literal, cast
27
27
 
28
28
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
29
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
- from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
30
+ from airflow.providers.common.compat.sdk import BaseHook, BaseOperator
31
31
 
32
32
  if TYPE_CHECKING:
33
33
  import pandas as pd
@@ -304,12 +304,11 @@ class SqlToS3Operator(BaseOperator):
304
304
  group_df.reset_index(drop=True),
305
305
  )
306
306
  elif isinstance(df, pl.DataFrame):
307
- for group_label, group_df in df.group_by(**self.groupby_kwargs): # type: ignore[assignment]
308
- if random_column_name:
309
- group_df = group_df.drop(random_column_name)
307
+ for group_label, group_df_in in df.group_by(**self.groupby_kwargs): # type: ignore[assignment]
308
+ group_df2 = group_df_in.drop(random_column_name) if random_column_name else group_df_in
310
309
  yield (
311
310
  cast("str", group_label[0] if isinstance(group_label, tuple) else group_label),
312
- group_df,
311
+ group_df2,
313
312
  )
314
313
 
315
314
  def _get_hook(self) -> DbApiHook:
@@ -20,7 +20,7 @@ from typing import TYPE_CHECKING
20
20
 
21
21
  from airflow.providers.amazon.aws.hooks.bedrock import BedrockAgentHook, BedrockHook
22
22
  from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
23
- from airflow.utils.types import NOTSET, ArgNotSet
23
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
@@ -41,6 +41,11 @@ class S3KeyTrigger(BaseTrigger):
41
41
  Unix wildcard pattern
42
42
  :param aws_conn_id: reference to the s3 connection
43
43
  :param use_regex: whether to use regex to check bucket
44
+ :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
45
+ Acceptable values: Any top level attribute returned by s3.head_object. Specify * to return
46
+ all available attributes.
47
+ Default value: "Size".
48
+ If the requested attribute is not found, the key is still included and the value is None.
44
49
  :param hook_params: params for hook its optional
45
50
  """
46
51
 
@@ -56,6 +61,7 @@ class S3KeyTrigger(BaseTrigger):
56
61
  region_name: str | None = None,
57
62
  verify: bool | str | None = None,
58
63
  botocore_config: dict | None = None,
64
+ metadata_keys: list[str] | None = None,
59
65
  **hook_params: Any,
60
66
  ):
61
67
  super().__init__()
@@ -70,6 +76,7 @@ class S3KeyTrigger(BaseTrigger):
70
76
  self.region_name = region_name
71
77
  self.verify = verify
72
78
  self.botocore_config = botocore_config
79
+ self.metadata_keys = metadata_keys if metadata_keys else ["Size", "Key"]
73
80
 
74
81
  def serialize(self) -> tuple[str, dict[str, Any]]:
75
82
  """Serialize S3KeyTrigger arguments and classpath."""
@@ -87,6 +94,7 @@ class S3KeyTrigger(BaseTrigger):
87
94
  "region_name": self.region_name,
88
95
  "verify": self.verify,
89
96
  "botocore_config": self.botocore_config,
97
+ "metadata_keys": self.metadata_keys,
90
98
  },
91
99
  )
92
100
 
@@ -108,11 +116,30 @@ class S3KeyTrigger(BaseTrigger):
108
116
  client, self.bucket_name, self.bucket_key, self.wildcard_match, self.use_regex
109
117
  ):
110
118
  if self.should_check_fn:
111
- s3_objects = await self.hook.get_files_async(
119
+ raw_objects = await self.hook.get_files_async(
112
120
  client, self.bucket_name, self.bucket_key, self.wildcard_match
113
121
  )
122
+ files = []
123
+ for f in raw_objects:
124
+ metadata = {}
125
+ obj = await self.hook.get_head_object_async(
126
+ client=client, key=f, bucket_name=self.bucket_name
127
+ )
128
+ if obj is None:
129
+ return
130
+
131
+ if "*" in self.metadata_keys:
132
+ metadata = obj
133
+ else:
134
+ for mk in self.metadata_keys:
135
+ if mk == "Size":
136
+ metadata[mk] = obj.get("ContentLength")
137
+ else:
138
+ metadata[mk] = obj.get(mk, None)
139
+ metadata["Key"] = f
140
+ files.append(metadata)
114
141
  await asyncio.sleep(self.poke_interval)
115
- yield TriggerEvent({"status": "running", "files": s3_objects})
142
+ yield TriggerEvent({"status": "running", "files": files})
116
143
  else:
117
144
  yield TriggerEvent({"status": "success"})
118
145
  return
@@ -36,6 +36,11 @@ class SsmRunCommandTrigger(AwsBaseWaiterTrigger):
36
36
  :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 120)
37
37
  :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
38
38
  :param aws_conn_id: The Airflow connection used for AWS credentials.
39
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
40
+ :param verify: Whether or not to verify SSL certificates. See:
41
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
42
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
43
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
39
44
  """
40
45
 
41
46
  def __init__(
@@ -45,6 +50,9 @@ class SsmRunCommandTrigger(AwsBaseWaiterTrigger):
45
50
  waiter_delay: int = 120,
46
51
  waiter_max_attempts: int = 75,
47
52
  aws_conn_id: str | None = None,
53
+ region_name: str | None = None,
54
+ verify: bool | str | None = None,
55
+ botocore_config: dict | None = None,
48
56
  ) -> None:
49
57
  super().__init__(
50
58
  serialized_fields={"command_id": command_id},
@@ -58,11 +66,19 @@ class SsmRunCommandTrigger(AwsBaseWaiterTrigger):
58
66
  waiter_delay=waiter_delay,
59
67
  waiter_max_attempts=waiter_max_attempts,
60
68
  aws_conn_id=aws_conn_id,
69
+ region_name=region_name,
70
+ verify=verify,
71
+ botocore_config=botocore_config,
61
72
  )
62
73
  self.command_id = command_id
63
74
 
64
75
  def hook(self) -> AwsGenericHook:
65
- return SsmHook(aws_conn_id=self.aws_conn_id)
76
+ return SsmHook(
77
+ aws_conn_id=self.aws_conn_id,
78
+ region_name=self.region_name,
79
+ verify=self.verify,
80
+ config=self.botocore_config,
81
+ )
66
82
 
67
83
  async def run(self) -> AsyncIterator[TriggerEvent]:
68
84
  hook = self.hook()
@@ -28,14 +28,11 @@ from botocore.config import Config
28
28
 
29
29
  from airflow.exceptions import AirflowException
30
30
  from airflow.providers.amazon.aws.utils import trim_none_values
31
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet
31
32
  from airflow.utils.log.logging_mixin import LoggingMixin
32
- from airflow.utils.types import NOTSET, ArgNotSet
33
33
 
34
34
  if TYPE_CHECKING:
35
- try:
36
- from airflow.sdk import Connection
37
- except ImportError:
38
- from airflow.models.connection import Connection # type: ignore[assignment]
35
+ from airflow.providers.common.compat.sdk import Connection
39
36
 
40
37
 
41
38
  @dataclass
@@ -158,4 +158,4 @@ def aws_template_fields(*template_fields: str) -> tuple[str, ...]:
158
158
  f"{', '.join(map(repr, template_fields))}."
159
159
  )
160
160
  raise TypeError(msg)
161
- return tuple(sorted(list({"aws_conn_id", "region_name", "verify"} | set(template_fields))))
161
+ return tuple(sorted({"aws_conn_id", "region_name", "verify"} | set(template_fields)))
@@ -51,8 +51,8 @@ def waiter(
51
51
  :param desired_state: Wait until the getter returns this value
52
52
  :param failure_states: A set of states which indicate failure and should throw an
53
53
  exception if any are reached before the desired_state
54
- :param object_type: Used for the reporting string. What are you waiting for? (application, job, etc)
55
- :param action: Used for the reporting string. What action are you waiting for? (created, deleted, etc)
54
+ :param object_type: Used for the reporting string. What are you waiting for? (application, job, etc.)
55
+ :param action: Used for the reporting string. What action are you waiting for? (created, deleted, etc.)
56
56
  :param countdown: Number of seconds the waiter should wait for the desired state before timing out.
57
57
  Defaults to 25 * 60 seconds. None = infinite.
58
58
  :param check_interval_seconds: Number of seconds waiter should wait before attempting
@@ -8,19 +8,19 @@
8
8
  "acceptors": [
9
9
  {
10
10
  "matcher": "path",
11
- "argument": "notebookexecution.status",
11
+ "argument": "NotebookExecution.Status",
12
12
  "expected": "RUNNING",
13
13
  "state": "success"
14
14
  },
15
15
  {
16
16
  "matcher": "path",
17
- "argument": "notebookexecution.status",
17
+ "argument": "NotebookExecution.Status",
18
18
  "expected": "FINISHED",
19
19
  "state": "success"
20
20
  },
21
21
  {
22
22
  "matcher": "path",
23
- "argument": "notebookexecution.status",
23
+ "argument": "NotebookExecution.Status",
24
24
  "expected": "FAILED",
25
25
  "state": "failure"
26
26
  }
@@ -33,19 +33,19 @@
33
33
  "acceptors": [
34
34
  {
35
35
  "matcher": "path",
36
- "argument": "notebookexecution.status",
36
+ "argument": "NotebookExecution.Status",
37
37
  "expected": "STOPPED",
38
38
  "state": "success"
39
39
  },
40
40
  {
41
41
  "matcher": "path",
42
- "argument": "notebookexecution.status",
42
+ "argument": "NotebookExecution.Status",
43
43
  "expected": "FINISHED",
44
44
  "state": "success"
45
45
  },
46
46
  {
47
47
  "matcher": "path",
48
- "argument": "notebookexecution.status",
48
+ "argument": "NotebookExecution.Status",
49
49
  "expected": "FAILED",
50
50
  "state": "failure"
51
51
  }
@@ -340,6 +340,20 @@ def get_provider_info():
340
340
  "how-to-guide": ["/docs/apache-airflow-providers-amazon/operators/neptune.rst"],
341
341
  "tags": ["aws"],
342
342
  },
343
+ {
344
+ "integration-name": "Amazon Kinesis Data Stream",
345
+ "external-doc-url": "https://aws.amazon.com/kinesis/",
346
+ "logo": "/docs/integration-logos/Amazon-Kinesis-Data-Firehose_light-bg@4x.png",
347
+ "how-to-guide": ["/docs/apache-airflow-providers-amazon/operators/kinesis_analytics.rst"],
348
+ "tags": ["aws"],
349
+ },
350
+ {
351
+ "integration-name": "Amazon Managed Workflows for Apache Airflow (MWAA)",
352
+ "external-doc-url": "https://aws.amazon.com/managed-workflows-for-apache-airflow/",
353
+ "logo": "/docs/integration-logos/Amazon-MWAA.png",
354
+ "how-to-guide": ["/docs/apache-airflow-providers-amazon/operators/mwaa.rst"],
355
+ "tags": ["aws"],
356
+ },
343
357
  ],
344
358
  "operators": [
345
359
  {
@@ -695,9 +709,13 @@ def get_provider_info():
695
709
  ],
696
710
  },
697
711
  {
698
- "integration-name": "Amazon Kinesis Data Firehose",
712
+ "integration-name": "Amazon Kinesis Data Stream",
699
713
  "python-modules": ["airflow.providers.amazon.aws.hooks.kinesis"],
700
714
  },
715
+ {
716
+ "integration-name": "Amazon Kinesis Data Firehose",
717
+ "python-modules": ["airflow.providers.amazon.aws.hooks.firehose"],
718
+ },
701
719
  {
702
720
  "integration-name": "AWS Lambda",
703
721
  "python-modules": ["airflow.providers.amazon.aws.hooks.lambda_function"],