apache-airflow-providers-amazon 9.15.0__py3-none-any.whl → 9.18.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. airflow/providers/amazon/__init__.py +3 -3
  2. airflow/providers/amazon/aws/auth_manager/avp/facade.py +1 -1
  3. airflow/providers/amazon/aws/auth_manager/routes/login.py +7 -1
  4. airflow/providers/amazon/aws/bundles/s3.py +1 -1
  5. airflow/providers/amazon/aws/exceptions.py +1 -1
  6. airflow/providers/amazon/aws/executors/aws_lambda/docker/app.py +5 -1
  7. airflow/providers/amazon/aws/executors/aws_lambda/lambda_executor.py +1 -2
  8. airflow/providers/amazon/aws/executors/batch/batch_executor.py +1 -2
  9. airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +1 -2
  10. airflow/providers/amazon/aws/hooks/athena.py +7 -3
  11. airflow/providers/amazon/aws/hooks/athena_sql.py +3 -3
  12. airflow/providers/amazon/aws/hooks/base_aws.py +3 -7
  13. airflow/providers/amazon/aws/hooks/batch_client.py +5 -7
  14. airflow/providers/amazon/aws/hooks/batch_waiters.py +1 -2
  15. airflow/providers/amazon/aws/hooks/chime.py +2 -2
  16. airflow/providers/amazon/aws/hooks/comprehend.py +1 -1
  17. airflow/providers/amazon/aws/hooks/datasync.py +3 -3
  18. airflow/providers/amazon/aws/hooks/dynamodb.py +1 -1
  19. airflow/providers/amazon/aws/hooks/ec2.py +1 -1
  20. airflow/providers/amazon/aws/hooks/elasticache_replication_group.py +1 -1
  21. airflow/providers/amazon/aws/hooks/emr.py +1 -1
  22. airflow/providers/amazon/aws/hooks/firehose.py +56 -0
  23. airflow/providers/amazon/aws/hooks/glue.py +9 -2
  24. airflow/providers/amazon/aws/hooks/glue_catalog.py +1 -1
  25. airflow/providers/amazon/aws/hooks/kinesis.py +31 -13
  26. airflow/providers/amazon/aws/hooks/logs.py +10 -2
  27. airflow/providers/amazon/aws/hooks/mwaa.py +38 -7
  28. airflow/providers/amazon/aws/hooks/quicksight.py +1 -1
  29. airflow/providers/amazon/aws/hooks/rds.py +1 -1
  30. airflow/providers/amazon/aws/hooks/redshift_sql.py +31 -8
  31. airflow/providers/amazon/aws/hooks/s3.py +14 -6
  32. airflow/providers/amazon/aws/hooks/sagemaker.py +1 -1
  33. airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +1 -2
  34. airflow/providers/amazon/aws/hooks/ssm.py +34 -6
  35. airflow/providers/amazon/aws/hooks/step_function.py +1 -1
  36. airflow/providers/amazon/aws/links/base_aws.py +1 -1
  37. airflow/providers/amazon/aws/links/emr.py +1 -1
  38. airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +50 -20
  39. airflow/providers/amazon/aws/operators/appflow.py +1 -1
  40. airflow/providers/amazon/aws/operators/athena.py +1 -1
  41. airflow/providers/amazon/aws/operators/base_aws.py +2 -2
  42. airflow/providers/amazon/aws/operators/batch.py +1 -1
  43. airflow/providers/amazon/aws/operators/bedrock.py +3 -1
  44. airflow/providers/amazon/aws/operators/cloud_formation.py +2 -2
  45. airflow/providers/amazon/aws/operators/comprehend.py +1 -1
  46. airflow/providers/amazon/aws/operators/datasync.py +1 -1
  47. airflow/providers/amazon/aws/operators/dms.py +1 -1
  48. airflow/providers/amazon/aws/operators/ec2.py +1 -1
  49. airflow/providers/amazon/aws/operators/ecs.py +1 -1
  50. airflow/providers/amazon/aws/operators/eks.py +2 -1
  51. airflow/providers/amazon/aws/operators/emr.py +22 -12
  52. airflow/providers/amazon/aws/operators/eventbridge.py +1 -1
  53. airflow/providers/amazon/aws/operators/glue.py +1 -1
  54. airflow/providers/amazon/aws/operators/glue_crawler.py +1 -1
  55. airflow/providers/amazon/aws/operators/glue_databrew.py +1 -1
  56. airflow/providers/amazon/aws/operators/kinesis_analytics.py +1 -1
  57. airflow/providers/amazon/aws/operators/lambda_function.py +1 -1
  58. airflow/providers/amazon/aws/operators/mwaa.py +13 -4
  59. airflow/providers/amazon/aws/operators/neptune.py +1 -1
  60. airflow/providers/amazon/aws/operators/rds.py +1 -1
  61. airflow/providers/amazon/aws/operators/redshift_cluster.py +1 -1
  62. airflow/providers/amazon/aws/operators/redshift_data.py +1 -1
  63. airflow/providers/amazon/aws/operators/s3.py +1 -1
  64. airflow/providers/amazon/aws/operators/sagemaker.py +1 -1
  65. airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +1 -2
  66. airflow/providers/amazon/aws/operators/ssm.py +122 -17
  67. airflow/providers/amazon/aws/operators/step_function.py +1 -1
  68. airflow/providers/amazon/aws/secrets/secrets_manager.py +3 -4
  69. airflow/providers/amazon/aws/sensors/athena.py +1 -1
  70. airflow/providers/amazon/aws/sensors/base_aws.py +2 -2
  71. airflow/providers/amazon/aws/sensors/batch.py +1 -1
  72. airflow/providers/amazon/aws/sensors/bedrock.py +1 -1
  73. airflow/providers/amazon/aws/sensors/comprehend.py +1 -1
  74. airflow/providers/amazon/aws/sensors/dms.py +1 -1
  75. airflow/providers/amazon/aws/sensors/ec2.py +1 -1
  76. airflow/providers/amazon/aws/sensors/ecs.py +1 -1
  77. airflow/providers/amazon/aws/sensors/eks.py +2 -1
  78. airflow/providers/amazon/aws/sensors/emr.py +1 -3
  79. airflow/providers/amazon/aws/sensors/glacier.py +1 -1
  80. airflow/providers/amazon/aws/sensors/glue.py +1 -1
  81. airflow/providers/amazon/aws/sensors/glue_catalog_partition.py +1 -1
  82. airflow/providers/amazon/aws/sensors/glue_crawler.py +1 -1
  83. airflow/providers/amazon/aws/sensors/kinesis_analytics.py +1 -1
  84. airflow/providers/amazon/aws/sensors/lambda_function.py +1 -1
  85. airflow/providers/amazon/aws/sensors/mwaa.py +15 -2
  86. airflow/providers/amazon/aws/sensors/opensearch_serverless.py +1 -1
  87. airflow/providers/amazon/aws/sensors/quicksight.py +1 -1
  88. airflow/providers/amazon/aws/sensors/rds.py +1 -1
  89. airflow/providers/amazon/aws/sensors/redshift_cluster.py +1 -1
  90. airflow/providers/amazon/aws/sensors/s3.py +3 -3
  91. airflow/providers/amazon/aws/sensors/sagemaker.py +1 -1
  92. airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +1 -2
  93. airflow/providers/amazon/aws/sensors/sqs.py +1 -1
  94. airflow/providers/amazon/aws/sensors/ssm.py +33 -17
  95. airflow/providers/amazon/aws/sensors/step_function.py +1 -1
  96. airflow/providers/amazon/aws/transfers/azure_blob_to_s3.py +3 -3
  97. airflow/providers/amazon/aws/transfers/base.py +5 -5
  98. airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +4 -4
  99. airflow/providers/amazon/aws/transfers/exasol_to_s3.py +1 -1
  100. airflow/providers/amazon/aws/transfers/ftp_to_s3.py +1 -1
  101. airflow/providers/amazon/aws/transfers/gcs_to_s3.py +48 -6
  102. airflow/providers/amazon/aws/transfers/glacier_to_gcs.py +1 -1
  103. airflow/providers/amazon/aws/transfers/google_api_to_s3.py +2 -5
  104. airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py +1 -1
  105. airflow/providers/amazon/aws/transfers/http_to_s3.py +1 -1
  106. airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py +1 -1
  107. airflow/providers/amazon/aws/transfers/local_to_s3.py +1 -1
  108. airflow/providers/amazon/aws/transfers/mongo_to_s3.py +1 -1
  109. airflow/providers/amazon/aws/transfers/redshift_to_s3.py +6 -7
  110. airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py +1 -2
  111. airflow/providers/amazon/aws/transfers/s3_to_ftp.py +1 -1
  112. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +6 -7
  113. airflow/providers/amazon/aws/transfers/s3_to_sftp.py +1 -1
  114. airflow/providers/amazon/aws/transfers/s3_to_sql.py +1 -2
  115. airflow/providers/amazon/aws/transfers/salesforce_to_s3.py +1 -1
  116. airflow/providers/amazon/aws/transfers/sftp_to_s3.py +1 -1
  117. airflow/providers/amazon/aws/transfers/sql_to_s3.py +8 -9
  118. airflow/providers/amazon/aws/triggers/bedrock.py +1 -1
  119. airflow/providers/amazon/aws/triggers/ecs.py +1 -1
  120. airflow/providers/amazon/aws/triggers/eks.py +1 -1
  121. airflow/providers/amazon/aws/triggers/s3.py +29 -2
  122. airflow/providers/amazon/aws/triggers/sagemaker.py +1 -1
  123. airflow/providers/amazon/aws/triggers/sqs.py +1 -1
  124. airflow/providers/amazon/aws/triggers/ssm.py +17 -1
  125. airflow/providers/amazon/aws/utils/__init__.py +1 -1
  126. airflow/providers/amazon/aws/utils/connection_wrapper.py +3 -6
  127. airflow/providers/amazon/aws/utils/mixins.py +1 -1
  128. airflow/providers/amazon/aws/utils/waiter.py +3 -3
  129. airflow/providers/amazon/aws/utils/waiter_with_logging.py +1 -1
  130. airflow/providers/amazon/aws/waiters/emr.json +6 -6
  131. airflow/providers/amazon/get_provider_info.py +19 -1
  132. airflow/providers/amazon/version_compat.py +19 -16
  133. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.1rc1.dist-info}/METADATA +29 -19
  134. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.1rc1.dist-info}/RECORD +138 -136
  135. apache_airflow_providers_amazon-9.18.1rc1.dist-info/licenses/NOTICE +5 -0
  136. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.1rc1.dist-info}/WHEEL +0 -0
  137. {apache_airflow_providers_amazon-9.15.0.dist-info → apache_airflow_providers_amazon-9.18.1rc1.dist-info}/entry_points.txt +0 -0
  138. {airflow/providers/amazon → apache_airflow_providers_amazon-9.18.1rc1.dist-info/licenses}/LICENSE +0 -0
@@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.ftp.hooks.ftp import FTPHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -25,9 +25,8 @@ from typing import TYPE_CHECKING
25
25
 
26
26
  from packaging.version import Version
27
27
 
28
- from airflow.exceptions import AirflowException
29
28
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
- from airflow.providers.amazon.version_compat import BaseOperator
29
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
31
30
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
32
31
 
33
32
  if TYPE_CHECKING:
@@ -39,6 +38,11 @@ class GCSToS3Operator(BaseOperator):
39
38
  """
40
39
  Synchronizes a Google Cloud Storage bucket with an S3 bucket.
41
40
 
41
+ .. note::
42
+ When flatten_structure=True, it takes precedence over keep_directory_structure.
43
+ For example, with flatten_structure=True, "folder/subfolder/file.txt" becomes "file.txt"
44
+ regardless of the keep_directory_structure setting.
45
+
42
46
  .. seealso::
43
47
  For more information on how to use this operator, take a look at the guide:
44
48
  :ref:`howto/operator:GCSToS3Operator`
@@ -79,6 +83,9 @@ class GCSToS3Operator(BaseOperator):
79
83
  object to be uploaded in S3
80
84
  :param keep_directory_structure: (Optional) When set to False the path of the file
81
85
  on the bucket is recreated within path passed in dest_s3_key.
86
+ :param flatten_structure: (Optional) When set to True, places all files directly
87
+ in the dest_s3_key directory without preserving subdirectory structure.
88
+ Takes precedence over keep_directory_structure when enabled.
82
89
  :param match_glob: (Optional) filters objects based on the glob pattern given by the string
83
90
  (e.g, ``'**/*/.json'``)
84
91
  :param gcp_user_project: (Optional) The identifier of the Google Cloud project to bill for this request.
@@ -108,6 +115,7 @@ class GCSToS3Operator(BaseOperator):
108
115
  dest_s3_extra_args: dict | None = None,
109
116
  s3_acl_policy: str | None = None,
110
117
  keep_directory_structure: bool = True,
118
+ flatten_structure: bool = False,
111
119
  match_glob: str | None = None,
112
120
  gcp_user_project: str | None = None,
113
121
  **kwargs,
@@ -124,6 +132,10 @@ class GCSToS3Operator(BaseOperator):
124
132
  self.dest_s3_extra_args = dest_s3_extra_args or {}
125
133
  self.s3_acl_policy = s3_acl_policy
126
134
  self.keep_directory_structure = keep_directory_structure
135
+ self.flatten_structure = flatten_structure
136
+
137
+ if self.flatten_structure and self.keep_directory_structure:
138
+ self.log.warning("flatten_structure=True takes precedence over keep_directory_structure=True")
127
139
  try:
128
140
  from airflow.providers.google import __version__ as _GOOGLE_PROVIDER_VERSION
129
141
 
@@ -140,6 +152,17 @@ class GCSToS3Operator(BaseOperator):
140
152
  self.match_glob = match_glob
141
153
  self.gcp_user_project = gcp_user_project
142
154
 
155
+ def _transform_file_path(self, file_path: str) -> str:
156
+ """
157
+ Transform the GCS file path according to the specified options.
158
+
159
+ :param file_path: The original GCS file path
160
+ :return: The transformed file path for S3 destination
161
+ """
162
+ if self.flatten_structure:
163
+ return os.path.basename(file_path)
164
+ return file_path
165
+
143
166
  def execute(self, context: Context) -> list[str]:
144
167
  # list all files in an Google Cloud Storage bucket
145
168
  gcs_hook = GCSHook(
@@ -167,7 +190,7 @@ class GCSToS3Operator(BaseOperator):
167
190
  aws_conn_id=self.dest_aws_conn_id, verify=self.dest_verify, extra_args=self.dest_s3_extra_args
168
191
  )
169
192
 
170
- if not self.keep_directory_structure and self.prefix:
193
+ if not self.keep_directory_structure and self.prefix and not self.flatten_structure:
171
194
  self.dest_s3_key = os.path.join(self.dest_s3_key, self.prefix)
172
195
 
173
196
  if not self.replace:
@@ -187,15 +210,34 @@ class GCSToS3Operator(BaseOperator):
187
210
  existing_files = existing_files or []
188
211
  # remove the prefix for the existing files to allow the match
189
212
  existing_files = [file.replace(prefix, "", 1) for file in existing_files]
190
- gcs_files = list(set(gcs_files) - set(existing_files))
213
+
214
+ # Transform GCS files for comparison and filter out existing ones
215
+ existing_files_set = set(existing_files)
216
+ filtered_files = []
217
+ seen_transformed = set()
218
+
219
+ for file in gcs_files:
220
+ transformed = self._transform_file_path(file)
221
+ if transformed not in existing_files_set and transformed not in seen_transformed:
222
+ filtered_files.append(file)
223
+ seen_transformed.add(transformed)
224
+ elif transformed in seen_transformed:
225
+ self.log.warning(
226
+ "Skipping duplicate file %s (transforms to %s)",
227
+ file,
228
+ transformed,
229
+ )
230
+
231
+ gcs_files = filtered_files
191
232
 
192
233
  if gcs_files:
193
234
  for file in gcs_files:
194
235
  with gcs_hook.provide_file(
195
236
  object_name=file, bucket_name=str(self.gcs_bucket), user_project=self.gcp_user_project
196
237
  ) as local_tmp_file:
197
- dest_key = os.path.join(self.dest_s3_key, file)
198
- self.log.info("Saving file to %s", dest_key)
238
+ transformed_path = self._transform_file_path(file)
239
+ dest_key = os.path.join(self.dest_s3_key, transformed_path)
240
+ self.log.info("Saving file from %s to %s", file, dest_key)
199
241
  s3_hook.load_file(
200
242
  filename=local_tmp_file.name,
201
243
  key=dest_key,
@@ -22,7 +22,7 @@ from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.glacier import GlacierHook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -26,14 +26,11 @@ from typing import TYPE_CHECKING
26
26
 
27
27
  from airflow.models.xcom import XCOM_RETURN_KEY
28
28
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
29
- from airflow.providers.amazon.version_compat import BaseOperator
29
+ from airflow.providers.common.compat.sdk import BaseOperator
30
30
  from airflow.providers.google.common.hooks.discovery_api import GoogleDiscoveryApiHook
31
31
 
32
32
  if TYPE_CHECKING:
33
- try:
34
- from airflow.sdk.types import RuntimeTaskInstanceProtocol
35
- except ImportError:
36
- from airflow.models import TaskInstance as RuntimeTaskInstanceProtocol # type: ignore[assignment]
33
+ from airflow.providers.common.compat.sdk import RuntimeTaskInstanceProtocol
37
34
  from airflow.utils.context import Context
38
35
 
39
36
  # MAX XCOM Size is 48KB
@@ -24,8 +24,8 @@ from collections.abc import Callable, Sequence
24
24
  from typing import TYPE_CHECKING, Literal
25
25
 
26
26
  from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
27
- from airflow.providers.amazon.version_compat import BaseOperator
28
27
  from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook
28
+ from airflow.providers.common.compat.sdk import BaseOperator
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from airflow.utils.context import Context
@@ -23,7 +23,7 @@ from functools import cached_property
23
23
  from typing import TYPE_CHECKING, Any
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.http.hooks.http import HttpHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -23,7 +23,7 @@ from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.imap.hooks.imap import ImapHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -21,7 +21,7 @@ from collections.abc import Sequence
21
21
  from typing import TYPE_CHECKING
22
22
 
23
23
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
24
- from airflow.providers.amazon.version_compat import BaseOperator
24
+ from airflow.providers.common.compat.sdk import BaseOperator
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  from airflow.utils.context import Context
@@ -24,7 +24,7 @@ from typing import TYPE_CHECKING, Any, cast
24
24
  from bson import json_util
25
25
 
26
26
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
- from airflow.providers.amazon.version_compat import BaseOperator
27
+ from airflow.providers.common.compat.sdk import BaseOperator
28
28
  from airflow.providers.mongo.hooks.mongo import MongoHook
29
29
 
30
30
  if TYPE_CHECKING:
@@ -23,13 +23,12 @@ import re
23
23
  from collections.abc import Iterable, Mapping, Sequence
24
24
  from typing import TYPE_CHECKING
25
25
 
26
- from airflow.exceptions import AirflowException
27
26
  from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
28
27
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
29
28
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
29
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
31
- from airflow.providers.amazon.version_compat import BaseOperator
32
- from airflow.utils.types import NOTSET, ArgNotSet
30
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet, is_arg_set
31
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
33
32
 
34
33
  if TYPE_CHECKING:
35
34
  from airflow.utils.context import Context
@@ -131,12 +130,12 @@ class RedshiftToS3Operator(BaseOperator):
131
130
  # actually provide a connection note that, because we don't want to let the exception bubble up in
132
131
  # that case (since we're silently injecting a connection on their behalf).
133
132
  self._aws_conn_id: str | None
134
- if isinstance(aws_conn_id, ArgNotSet):
135
- self.conn_set = False
136
- self._aws_conn_id = "aws_default"
137
- else:
133
+ if is_arg_set(aws_conn_id):
138
134
  self.conn_set = True
139
135
  self._aws_conn_id = aws_conn_id
136
+ else:
137
+ self.conn_set = False
138
+ self._aws_conn_id = "aws_default"
140
139
 
141
140
  def _build_unload_query(
142
141
  self, credentials_block: str, select_query: str, s3_key: str, unload_options: str
@@ -22,9 +22,8 @@ from typing import TYPE_CHECKING, Any, Literal, TypedDict
22
22
 
23
23
  from botocore.exceptions import ClientError, WaiterError
24
24
 
25
- from airflow.exceptions import AirflowException
26
25
  from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
27
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
28
27
 
29
28
  if TYPE_CHECKING:
30
29
  from airflow.utils.context import Context
@@ -22,7 +22,7 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.ftp.hooks.ftp import FTPHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -19,13 +19,12 @@ from __future__ import annotations
19
19
  from collections.abc import Iterable, Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
- from airflow.exceptions import AirflowException
23
22
  from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
24
23
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
25
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
25
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
27
- from airflow.providers.amazon.version_compat import BaseOperator
28
- from airflow.utils.types import NOTSET, ArgNotSet
26
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet, is_arg_set
27
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
29
28
 
30
29
  if TYPE_CHECKING:
31
30
  from airflow.utils.context import Context
@@ -122,12 +121,12 @@ class S3ToRedshiftOperator(BaseOperator):
122
121
  # actually provide a connection note that, because we don't want to let the exception bubble up in
123
122
  # that case (since we're silently injecting a connection on their behalf).
124
123
  self._aws_conn_id: str | None
125
- if isinstance(aws_conn_id, ArgNotSet):
126
- self.conn_set = False
127
- self._aws_conn_id = "aws_default"
128
- else:
124
+ if is_arg_set(aws_conn_id):
129
125
  self.conn_set = True
130
126
  self._aws_conn_id = aws_conn_id
127
+ else:
128
+ self.conn_set = False
129
+ self._aws_conn_id = "aws_default"
131
130
 
132
131
  if self.redshift_data_api_kwargs:
133
132
  for arg in ["sql", "parameters"]:
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING
23
23
  from urllib.parse import urlsplit
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.ssh.hooks.ssh import SSHHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -21,9 +21,8 @@ from functools import cached_property
21
21
  from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.exceptions import AirflowException
25
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
25
+ from airflow.providers.common.compat.sdk import AirflowException, BaseHook, BaseOperator
27
26
 
28
27
  if TYPE_CHECKING:
29
28
  from airflow.utils.context import Context
@@ -22,7 +22,7 @@ from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
- from airflow.providers.amazon.version_compat import BaseOperator
25
+ from airflow.providers.common.compat.sdk import BaseOperator
26
26
  from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING
23
23
  from urllib.parse import urlsplit
24
24
 
25
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
- from airflow.providers.amazon.version_compat import BaseOperator
26
+ from airflow.providers.common.compat.sdk import BaseOperator
27
27
  from airflow.providers.ssh.hooks.ssh import SSHHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -25,9 +25,9 @@ from collections import namedtuple
25
25
  from collections.abc import Iterable, Mapping, Sequence
26
26
  from typing import TYPE_CHECKING, Any, Literal, cast
27
27
 
28
- from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
28
+ from airflow.exceptions import AirflowProviderDeprecationWarning
29
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
- from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
30
+ from airflow.providers.common.compat.sdk import AirflowException, BaseHook, BaseOperator
31
31
 
32
32
  if TYPE_CHECKING:
33
33
  import pandas as pd
@@ -191,7 +191,7 @@ class SqlToS3Operator(BaseOperator):
191
191
  if df[col].dtype.name == "object" and file_format == FILE_FORMAT.PARQUET:
192
192
  # if the type wasn't identified or converted, change it to a string so if can still be
193
193
  # processed.
194
- df[col] = df[col].astype(str)
194
+ df[col] = cast("pd.Series", df[col].astype(str)) # type: ignore[call-overload]
195
195
 
196
196
  if "float" in df[col].dtype.name and df[col].hasnans:
197
197
  # inspect values to determine if dtype of non-null values is int or float
@@ -201,13 +201,13 @@ class SqlToS3Operator(BaseOperator):
201
201
  # The type ignore can be removed here if https://github.com/numpy/numpy/pull/23690
202
202
  # is merged and released as currently NumPy does not consider None as valid for x/y.
203
203
  df[col] = np.where(df[col].isnull(), None, df[col]) # type: ignore[call-overload]
204
- df[col] = df[col].astype(pd.Int64Dtype())
204
+ df[col] = cast("pd.Series", df[col].astype(pd.Int64Dtype())) # type: ignore[call-overload]
205
205
  elif np.isclose(notna_series, notna_series.astype(int)).all():
206
206
  # set to float dtype that retains floats and supports NaNs
207
207
  # The type ignore can be removed here if https://github.com/numpy/numpy/pull/23690
208
208
  # is merged and released
209
209
  df[col] = np.where(df[col].isnull(), None, df[col]) # type: ignore[call-overload]
210
- df[col] = df[col].astype(pd.Float64Dtype())
210
+ df[col] = cast("pd.Series", df[col].astype(pd.Float64Dtype())) # type: ignore[call-overload]
211
211
 
212
212
  @staticmethod
213
213
  def _strip_suffixes(
@@ -304,12 +304,11 @@ class SqlToS3Operator(BaseOperator):
304
304
  group_df.reset_index(drop=True),
305
305
  )
306
306
  elif isinstance(df, pl.DataFrame):
307
- for group_label, group_df in df.group_by(**self.groupby_kwargs): # type: ignore[assignment]
308
- if random_column_name:
309
- group_df = group_df.drop(random_column_name)
307
+ for group_label, group_df_in in df.group_by(**self.groupby_kwargs): # type: ignore[assignment]
308
+ group_df2 = group_df_in.drop(random_column_name) if random_column_name else group_df_in
310
309
  yield (
311
310
  cast("str", group_label[0] if isinstance(group_label, tuple) else group_label),
312
- group_df,
311
+ group_df2,
313
312
  )
314
313
 
315
314
  def _get_hook(self) -> DbApiHook:
@@ -20,7 +20,7 @@ from typing import TYPE_CHECKING
20
20
 
21
21
  from airflow.providers.amazon.aws.hooks.bedrock import BedrockAgentHook, BedrockHook
22
22
  from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
23
- from airflow.utils.types import NOTSET, ArgNotSet
23
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
@@ -23,11 +23,11 @@ from typing import TYPE_CHECKING, Any
23
23
 
24
24
  from botocore.exceptions import ClientError, WaiterError
25
25
 
26
- from airflow.exceptions import AirflowException
27
26
  from airflow.providers.amazon.aws.hooks.ecs import EcsHook
28
27
  from airflow.providers.amazon.aws.hooks.logs import AwsLogsHook
29
28
  from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
30
29
  from airflow.providers.amazon.aws.utils.task_log_fetcher import AwsTaskLogFetcher
30
+ from airflow.providers.common.compat.sdk import AirflowException
31
31
  from airflow.triggers.base import BaseTrigger, TriggerEvent
32
32
 
33
33
  if TYPE_CHECKING:
@@ -20,10 +20,10 @@ from typing import TYPE_CHECKING, Any
20
20
 
21
21
  from botocore.exceptions import ClientError
22
22
 
23
- from airflow.exceptions import AirflowException
24
23
  from airflow.providers.amazon.aws.hooks.eks import EksHook
25
24
  from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
26
25
  from airflow.providers.amazon.aws.utils.waiter_with_logging import async_wait
26
+ from airflow.providers.common.compat.sdk import AirflowException
27
27
  from airflow.triggers.base import TriggerEvent
28
28
 
29
29
  if TYPE_CHECKING:
@@ -41,6 +41,11 @@ class S3KeyTrigger(BaseTrigger):
41
41
  Unix wildcard pattern
42
42
  :param aws_conn_id: reference to the s3 connection
43
43
  :param use_regex: whether to use regex to check bucket
44
+ :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
45
+ Acceptable values: Any top level attribute returned by s3.head_object. Specify * to return
46
+ all available attributes.
47
+ Default value: "Size".
48
+ If the requested attribute is not found, the key is still included and the value is None.
44
49
  :param hook_params: params for hook its optional
45
50
  """
46
51
 
@@ -56,6 +61,7 @@ class S3KeyTrigger(BaseTrigger):
56
61
  region_name: str | None = None,
57
62
  verify: bool | str | None = None,
58
63
  botocore_config: dict | None = None,
64
+ metadata_keys: list[str] | None = None,
59
65
  **hook_params: Any,
60
66
  ):
61
67
  super().__init__()
@@ -70,6 +76,7 @@ class S3KeyTrigger(BaseTrigger):
70
76
  self.region_name = region_name
71
77
  self.verify = verify
72
78
  self.botocore_config = botocore_config
79
+ self.metadata_keys = metadata_keys if metadata_keys else ["Size", "Key"]
73
80
 
74
81
  def serialize(self) -> tuple[str, dict[str, Any]]:
75
82
  """Serialize S3KeyTrigger arguments and classpath."""
@@ -87,6 +94,7 @@ class S3KeyTrigger(BaseTrigger):
87
94
  "region_name": self.region_name,
88
95
  "verify": self.verify,
89
96
  "botocore_config": self.botocore_config,
97
+ "metadata_keys": self.metadata_keys,
90
98
  },
91
99
  )
92
100
 
@@ -108,11 +116,30 @@ class S3KeyTrigger(BaseTrigger):
108
116
  client, self.bucket_name, self.bucket_key, self.wildcard_match, self.use_regex
109
117
  ):
110
118
  if self.should_check_fn:
111
- s3_objects = await self.hook.get_files_async(
119
+ raw_objects = await self.hook.get_files_async(
112
120
  client, self.bucket_name, self.bucket_key, self.wildcard_match
113
121
  )
122
+ files = []
123
+ for f in raw_objects:
124
+ metadata = {}
125
+ obj = await self.hook.get_head_object_async(
126
+ client=client, key=f, bucket_name=self.bucket_name
127
+ )
128
+ if obj is None:
129
+ return
130
+
131
+ if "*" in self.metadata_keys:
132
+ metadata = obj
133
+ else:
134
+ for mk in self.metadata_keys:
135
+ if mk == "Size":
136
+ metadata[mk] = obj.get("ContentLength")
137
+ else:
138
+ metadata[mk] = obj.get(mk, None)
139
+ metadata["Key"] = f
140
+ files.append(metadata)
114
141
  await asyncio.sleep(self.poke_interval)
115
- yield TriggerEvent({"status": "running", "files": s3_objects})
142
+ yield TriggerEvent({"status": "running", "files": files})
116
143
  else:
117
144
  yield TriggerEvent({"status": "success"})
118
145
  return
@@ -26,9 +26,9 @@ from typing import Any
26
26
 
27
27
  from botocore.exceptions import WaiterError
28
28
 
29
- from airflow.exceptions import AirflowException
30
29
  from airflow.providers.amazon.aws.hooks.sagemaker import SageMakerHook
31
30
  from airflow.providers.amazon.aws.utils.waiter_with_logging import async_wait
31
+ from airflow.providers.common.compat.sdk import AirflowException
32
32
  from airflow.triggers.base import BaseTrigger, TriggerEvent
33
33
 
34
34
 
@@ -20,10 +20,10 @@ import asyncio
20
20
  from collections.abc import AsyncIterator, Collection
21
21
  from typing import TYPE_CHECKING, Any
22
22
 
23
- from airflow.exceptions import AirflowException
24
23
  from airflow.providers.amazon.aws.hooks.sqs import SqsHook
25
24
  from airflow.providers.amazon.aws.utils.sqs import process_response
26
25
  from airflow.providers.amazon.version_compat import AIRFLOW_V_3_0_PLUS
26
+ from airflow.providers.common.compat.sdk import AirflowException
27
27
 
28
28
  if AIRFLOW_V_3_0_PLUS:
29
29
  from airflow.triggers.base import BaseEventTrigger, TriggerEvent
@@ -36,6 +36,11 @@ class SsmRunCommandTrigger(AwsBaseWaiterTrigger):
36
36
  :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 120)
37
37
  :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
38
38
  :param aws_conn_id: The Airflow connection used for AWS credentials.
39
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
40
+ :param verify: Whether or not to verify SSL certificates. See:
41
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
42
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
43
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
39
44
  """
40
45
 
41
46
  def __init__(
@@ -45,6 +50,9 @@ class SsmRunCommandTrigger(AwsBaseWaiterTrigger):
45
50
  waiter_delay: int = 120,
46
51
  waiter_max_attempts: int = 75,
47
52
  aws_conn_id: str | None = None,
53
+ region_name: str | None = None,
54
+ verify: bool | str | None = None,
55
+ botocore_config: dict | None = None,
48
56
  ) -> None:
49
57
  super().__init__(
50
58
  serialized_fields={"command_id": command_id},
@@ -58,11 +66,19 @@ class SsmRunCommandTrigger(AwsBaseWaiterTrigger):
58
66
  waiter_delay=waiter_delay,
59
67
  waiter_max_attempts=waiter_max_attempts,
60
68
  aws_conn_id=aws_conn_id,
69
+ region_name=region_name,
70
+ verify=verify,
71
+ botocore_config=botocore_config,
61
72
  )
62
73
  self.command_id = command_id
63
74
 
64
75
  def hook(self) -> AwsGenericHook:
65
- return SsmHook(aws_conn_id=self.aws_conn_id)
76
+ return SsmHook(
77
+ aws_conn_id=self.aws_conn_id,
78
+ region_name=self.region_name,
79
+ verify=self.verify,
80
+ config=self.botocore_config,
81
+ )
66
82
 
67
83
  async def run(self) -> AsyncIterator[TriggerEvent]:
68
84
  hook = self.hook()
@@ -23,7 +23,7 @@ from enum import Enum
23
23
  from importlib import metadata
24
24
  from typing import Any
25
25
 
26
- from airflow.exceptions import AirflowException
26
+ from airflow.providers.common.compat.sdk import AirflowException
27
27
  from airflow.utils.helpers import prune_dict
28
28
  from airflow.version import version
29
29
 
@@ -26,16 +26,13 @@ from typing import TYPE_CHECKING, Any
26
26
  from botocore import UNSIGNED
27
27
  from botocore.config import Config
28
28
 
29
- from airflow.exceptions import AirflowException
30
29
  from airflow.providers.amazon.aws.utils import trim_none_values
30
+ from airflow.providers.amazon.version_compat import NOTSET, ArgNotSet
31
+ from airflow.providers.common.compat.sdk import AirflowException
31
32
  from airflow.utils.log.logging_mixin import LoggingMixin
32
- from airflow.utils.types import NOTSET, ArgNotSet
33
33
 
34
34
  if TYPE_CHECKING:
35
- try:
36
- from airflow.sdk import Connection
37
- except ImportError:
38
- from airflow.models.connection import Connection # type: ignore[assignment]
35
+ from airflow.providers.common.compat.sdk import Connection
39
36
 
40
37
 
41
38
  @dataclass
@@ -158,4 +158,4 @@ def aws_template_fields(*template_fields: str) -> tuple[str, ...]:
158
158
  f"{', '.join(map(repr, template_fields))}."
159
159
  )
160
160
  raise TypeError(msg)
161
- return tuple(sorted(list({"aws_conn_id", "region_name", "verify"} | set(template_fields))))
161
+ return tuple(sorted({"aws_conn_id", "region_name", "verify"} | set(template_fields)))
@@ -22,7 +22,7 @@ import time
22
22
  from collections.abc import Callable
23
23
  from enum import Enum
24
24
 
25
- from airflow.exceptions import AirflowException
25
+ from airflow.providers.common.compat.sdk import AirflowException
26
26
 
27
27
  log = logging.getLogger(__name__)
28
28
 
@@ -51,8 +51,8 @@ def waiter(
51
51
  :param desired_state: Wait until the getter returns this value
52
52
  :param failure_states: A set of states which indicate failure and should throw an
53
53
  exception if any are reached before the desired_state
54
- :param object_type: Used for the reporting string. What are you waiting for? (application, job, etc)
55
- :param action: Used for the reporting string. What action are you waiting for? (created, deleted, etc)
54
+ :param object_type: Used for the reporting string. What are you waiting for? (application, job, etc.)
55
+ :param action: Used for the reporting string. What action are you waiting for? (created, deleted, etc.)
56
56
  :param countdown: Number of seconds the waiter should wait for the desired state before timing out.
57
57
  Defaults to 25 * 60 seconds. None = infinite.
58
58
  :param check_interval_seconds: Number of seconds waiter should wait before attempting
@@ -25,7 +25,7 @@ from typing import TYPE_CHECKING, Any
25
25
  import jmespath
26
26
  from botocore.exceptions import NoCredentialsError, WaiterError
27
27
 
28
- from airflow.exceptions import AirflowException
28
+ from airflow.providers.common.compat.sdk import AirflowException
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from botocore.waiter import Waiter