apache-airflow-providers-amazon 8.25.0rc1__py3-none-any.whl → 8.26.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/hooks/athena.py +18 -9
  3. airflow/providers/amazon/aws/hooks/athena_sql.py +2 -1
  4. airflow/providers/amazon/aws/hooks/base_aws.py +34 -10
  5. airflow/providers/amazon/aws/hooks/chime.py +2 -1
  6. airflow/providers/amazon/aws/hooks/datasync.py +6 -3
  7. airflow/providers/amazon/aws/hooks/ecr.py +2 -1
  8. airflow/providers/amazon/aws/hooks/ecs.py +12 -6
  9. airflow/providers/amazon/aws/hooks/glacier.py +8 -4
  10. airflow/providers/amazon/aws/hooks/kinesis.py +2 -1
  11. airflow/providers/amazon/aws/hooks/logs.py +4 -2
  12. airflow/providers/amazon/aws/hooks/redshift_cluster.py +24 -12
  13. airflow/providers/amazon/aws/hooks/redshift_data.py +4 -2
  14. airflow/providers/amazon/aws/hooks/redshift_sql.py +6 -3
  15. airflow/providers/amazon/aws/hooks/s3.py +70 -53
  16. airflow/providers/amazon/aws/hooks/sagemaker.py +82 -41
  17. airflow/providers/amazon/aws/hooks/secrets_manager.py +6 -3
  18. airflow/providers/amazon/aws/hooks/sts.py +2 -1
  19. airflow/providers/amazon/aws/operators/athena.py +21 -8
  20. airflow/providers/amazon/aws/operators/batch.py +12 -6
  21. airflow/providers/amazon/aws/operators/datasync.py +2 -1
  22. airflow/providers/amazon/aws/operators/ecs.py +1 -0
  23. airflow/providers/amazon/aws/operators/emr.py +6 -86
  24. airflow/providers/amazon/aws/operators/glue.py +4 -2
  25. airflow/providers/amazon/aws/operators/glue_crawler.py +22 -19
  26. airflow/providers/amazon/aws/operators/neptune.py +2 -1
  27. airflow/providers/amazon/aws/operators/redshift_cluster.py +2 -1
  28. airflow/providers/amazon/aws/operators/sagemaker.py +2 -1
  29. airflow/providers/amazon/aws/sensors/base_aws.py +2 -1
  30. airflow/providers/amazon/aws/sensors/glue_catalog_partition.py +25 -17
  31. airflow/providers/amazon/aws/sensors/glue_crawler.py +16 -12
  32. airflow/providers/amazon/aws/transfers/mongo_to_s3.py +6 -3
  33. airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py +2 -1
  34. airflow/providers/amazon/aws/transfers/s3_to_sql.py +2 -1
  35. airflow/providers/amazon/aws/triggers/ecs.py +3 -1
  36. airflow/providers/amazon/aws/triggers/glue.py +15 -3
  37. airflow/providers/amazon/aws/triggers/glue_crawler.py +8 -1
  38. airflow/providers/amazon/aws/utils/connection_wrapper.py +10 -5
  39. airflow/providers/amazon/aws/utils/mixins.py +2 -1
  40. airflow/providers/amazon/aws/utils/redshift.py +2 -1
  41. airflow/providers/amazon/get_provider_info.py +2 -1
  42. {apache_airflow_providers_amazon-8.25.0rc1.dist-info → apache_airflow_providers_amazon-8.26.0rc1.dist-info}/METADATA +6 -6
  43. {apache_airflow_providers_amazon-8.25.0rc1.dist-info → apache_airflow_providers_amazon-8.26.0rc1.dist-info}/RECORD +45 -45
  44. {apache_airflow_providers_amazon-8.25.0rc1.dist-info → apache_airflow_providers_amazon-8.26.0rc1.dist-info}/WHEEL +0 -0
  45. {apache_airflow_providers_amazon-8.25.0rc1.dist-info → apache_airflow_providers_amazon-8.26.0rc1.dist-info}/entry_points.txt +0 -0
@@ -175,9 +175,6 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
175
175
  f"query_execution_id is {self.query_execution_id}."
176
176
  )
177
177
 
178
- # Save output location from API response for later use in OpenLineage.
179
- self.output_location = self.hook.get_output_location(self.query_execution_id)
180
-
181
178
  return self.query_execution_id
182
179
 
183
180
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
@@ -185,6 +182,9 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
185
182
 
186
183
  if event["status"] != "success":
187
184
  raise AirflowException(f"Error while waiting for operation on cluster to complete: {event}")
185
+
186
+ # Save query_execution_id to be later used by listeners
187
+ self.query_execution_id = event["value"]
188
188
  return event["value"]
189
189
 
190
190
  def on_kill(self) -> None:
@@ -208,13 +208,21 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
208
208
  )
209
209
  self.hook.poll_query_status(self.query_execution_id, sleep_time=self.sleep_time)
210
210
 
211
- def get_openlineage_facets_on_start(self) -> OperatorLineage:
212
- """Retrieve OpenLineage data by parsing SQL queries and enriching them with Athena API.
211
+ def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
212
+ """
213
+ Retrieve OpenLineage data by parsing SQL queries and enriching them with Athena API.
213
214
 
214
215
  In addition to CTAS query, query and calculation results are stored in S3 location.
215
- For that reason additional output is attached with this location.
216
+ For that reason additional output is attached with this location. Instead of using the complete
217
+ path where the results are saved (user's prefix + some UUID), we are creating a dataset with the
218
+ user-provided path only. This should make it easier to match this dataset across different processes.
216
219
  """
217
- from openlineage.client.facet import ExtractionError, ExtractionErrorRunFacet, SqlJobFacet
220
+ from openlineage.client.facet import (
221
+ ExternalQueryRunFacet,
222
+ ExtractionError,
223
+ ExtractionErrorRunFacet,
224
+ SqlJobFacet,
225
+ )
218
226
  from openlineage.client.run import Dataset
219
227
 
220
228
  from airflow.providers.openlineage.extractors.base import OperatorLineage
@@ -264,6 +272,11 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
264
272
  )
265
273
  )
266
274
 
275
+ if self.query_execution_id:
276
+ run_facets["externalQuery"] = ExternalQueryRunFacet(
277
+ externalQueryId=self.query_execution_id, source="awsathena"
278
+ )
279
+
267
280
  if self.output_location:
268
281
  parsed = urlparse(self.output_location)
269
282
  outputs.append(Dataset(namespace=f"{parsed.scheme}://{parsed.netloc}", name=parsed.path or "/"))
@@ -300,7 +313,7 @@ class AthenaOperator(AwsBaseOperator[AthenaHook]):
300
313
  )
301
314
  }
302
315
  fields = [
303
- SchemaField(name=column["Name"], type=column["Type"], description=column["Comment"])
316
+ SchemaField(name=column["Name"], type=column["Type"], description=column.get("Comment"))
304
317
  for column in table_metadata["TableMetadata"]["Columns"]
305
318
  ]
306
319
  if fields:
@@ -14,7 +14,8 @@
14
14
  # KIND, either express or implied. See the License for the
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
- """AWS Batch services.
17
+ """
18
+ AWS Batch services.
18
19
 
19
20
  .. seealso::
20
21
 
@@ -54,7 +55,8 @@ if TYPE_CHECKING:
54
55
 
55
56
 
56
57
  class BatchOperator(BaseOperator):
57
- """Execute a job on AWS Batch.
58
+ """
59
+ Execute a job on AWS Batch.
58
60
 
59
61
  .. seealso::
60
62
  For more information on how to use this operator, take a look at the guide:
@@ -236,7 +238,8 @@ class BatchOperator(BaseOperator):
236
238
  )
237
239
 
238
240
  def execute(self, context: Context) -> str | None:
239
- """Submit and monitor an AWS Batch job.
241
+ """
242
+ Submit and monitor an AWS Batch job.
240
243
 
241
244
  :raises: AirflowException
242
245
  """
@@ -287,7 +290,8 @@ class BatchOperator(BaseOperator):
287
290
  self.log.info("AWS Batch job (%s) terminated: %s", self.job_id, response)
288
291
 
289
292
  def submit_job(self, context: Context):
290
- """Submit an AWS Batch job.
293
+ """
294
+ Submit an AWS Batch job.
291
295
 
292
296
  :raises: AirflowException
293
297
  """
@@ -342,7 +346,8 @@ class BatchOperator(BaseOperator):
342
346
  )
343
347
 
344
348
  def monitor_job(self, context: Context):
345
- """Monitor an AWS Batch job.
349
+ """
350
+ Monitor an AWS Batch job.
346
351
 
347
352
  This can raise an exception or an AirflowTaskTimeout if the task was
348
353
  created with ``execution_timeout``.
@@ -434,7 +439,8 @@ class BatchOperator(BaseOperator):
434
439
 
435
440
 
436
441
  class BatchCreateComputeEnvironmentOperator(BaseOperator):
437
- """Create an AWS Batch compute environment.
442
+ """
443
+ Create an AWS Batch compute environment.
438
444
 
439
445
  .. seealso::
440
446
  For more information on how to use this operator, take a look at the guide:
@@ -34,7 +34,8 @@ if TYPE_CHECKING:
34
34
 
35
35
 
36
36
  class DataSyncOperator(AwsBaseOperator[DataSyncHook]):
37
- """Find, Create, Update, Execute and Delete AWS DataSync Tasks.
37
+ """
38
+ Find, Create, Update, Execute and Delete AWS DataSync Tasks.
38
39
 
39
40
  If ``do_xcom_push`` is True, then the DataSync TaskArn and TaskExecutionArn
40
41
  which were executed will be pushed to an XCom.
@@ -586,6 +586,7 @@ class EcsRunTaskOperator(EcsBaseOperator):
586
586
  if event["status"] != "success":
587
587
  raise AirflowException(f"Error in task execution: {event}")
588
588
  self.arn = event["task_arn"] # restore arn to its updated value, needed for next steps
589
+ self.cluster = event["cluster"]
589
590
  self._after_execution()
590
591
  if self._aws_logs_enabled():
591
592
  # same behavior as non-deferrable mode, return last line of logs of the task.
@@ -27,7 +27,6 @@ from uuid import uuid4
27
27
  from airflow.configuration import conf
28
28
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
29
29
  from airflow.models import BaseOperator
30
- from airflow.models.mappedoperator import MappedOperator
31
30
  from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
32
31
  from airflow.providers.amazon.aws.links.emr import (
33
32
  EmrClusterLink,
@@ -1259,91 +1258,12 @@ class EmrServerlessStartJobOperator(BaseOperator):
1259
1258
  "configuration_overrides": "json",
1260
1259
  }
1261
1260
 
1262
- @property
1263
- def operator_extra_links(self):
1264
- """
1265
- Dynamically add extra links depending on the job type and if they're enabled.
1266
-
1267
- If S3 or CloudWatch monitoring configurations exist, add links directly to the relevant consoles.
1268
- Only add dashboard links if they're explicitly enabled. These are one-time links that any user
1269
- can access, but expire on first click or one hour, whichever comes first.
1270
- """
1271
- op_extra_links = []
1272
-
1273
- if isinstance(self, MappedOperator):
1274
- operator_class = self.operator_class
1275
- enable_application_ui_links = self.partial_kwargs.get(
1276
- "enable_application_ui_links"
1277
- ) or self.expand_input.value.get("enable_application_ui_links")
1278
- job_driver = self.partial_kwargs.get("job_driver", {}) or self.expand_input.value.get(
1279
- "job_driver", {}
1280
- )
1281
- configuration_overrides = self.partial_kwargs.get(
1282
- "configuration_overrides"
1283
- ) or self.expand_input.value.get("configuration_overrides")
1284
-
1285
- # Configuration overrides can either be a list or a dictionary, depending on whether it's passed in as partial or expand.
1286
- if isinstance(configuration_overrides, list):
1287
- if any(
1288
- [
1289
- operator_class.is_monitoring_in_job_override(
1290
- self=operator_class,
1291
- config_key="s3MonitoringConfiguration",
1292
- job_override=job_override,
1293
- )
1294
- for job_override in configuration_overrides
1295
- ]
1296
- ):
1297
- op_extra_links.extend([EmrServerlessS3LogsLink()])
1298
- if any(
1299
- [
1300
- operator_class.is_monitoring_in_job_override(
1301
- self=operator_class,
1302
- config_key="cloudWatchLoggingConfiguration",
1303
- job_override=job_override,
1304
- )
1305
- for job_override in configuration_overrides
1306
- ]
1307
- ):
1308
- op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
1309
- else:
1310
- if operator_class.is_monitoring_in_job_override(
1311
- self=operator_class,
1312
- config_key="s3MonitoringConfiguration",
1313
- job_override=configuration_overrides,
1314
- ):
1315
- op_extra_links.extend([EmrServerlessS3LogsLink()])
1316
- if operator_class.is_monitoring_in_job_override(
1317
- self=operator_class,
1318
- config_key="cloudWatchLoggingConfiguration",
1319
- job_override=configuration_overrides,
1320
- ):
1321
- op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
1322
-
1323
- else:
1324
- operator_class = self
1325
- enable_application_ui_links = self.enable_application_ui_links
1326
- configuration_overrides = self.configuration_overrides
1327
- job_driver = self.job_driver
1328
-
1329
- if operator_class.is_monitoring_in_job_override(
1330
- "s3MonitoringConfiguration", configuration_overrides
1331
- ):
1332
- op_extra_links.extend([EmrServerlessS3LogsLink()])
1333
- if operator_class.is_monitoring_in_job_override(
1334
- "cloudWatchLoggingConfiguration", configuration_overrides
1335
- ):
1336
- op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
1337
-
1338
- if enable_application_ui_links:
1339
- op_extra_links.extend([EmrServerlessDashboardLink()])
1340
- if isinstance(job_driver, list):
1341
- if any("sparkSubmit" in ind_job_driver for ind_job_driver in job_driver):
1342
- op_extra_links.extend([EmrServerlessLogsLink()])
1343
- elif "sparkSubmit" in job_driver:
1344
- op_extra_links.extend([EmrServerlessLogsLink()])
1345
-
1346
- return tuple(op_extra_links)
1261
+ operator_extra_links = (
1262
+ EmrServerlessS3LogsLink(),
1263
+ EmrServerlessCloudWatchLogsLink(),
1264
+ EmrServerlessDashboardLink(),
1265
+ EmrServerlessLogsLink(),
1266
+ )
1347
1267
 
1348
1268
  def __init__(
1349
1269
  self,
@@ -43,7 +43,8 @@ if TYPE_CHECKING:
43
43
 
44
44
 
45
45
  class GlueJobOperator(BaseOperator):
46
- """Create an AWS Glue Job.
46
+ """
47
+ Create an AWS Glue Job.
47
48
 
48
49
  AWS Glue is a serverless Spark ETL service for running Spark Jobs on the AWS
49
50
  cloud. Language support: Python and Scala.
@@ -179,7 +180,8 @@ class GlueJobOperator(BaseOperator):
179
180
  )
180
181
 
181
182
  def execute(self, context: Context):
182
- """Execute AWS Glue Job from Airflow.
183
+ """
184
+ Execute AWS Glue Job from Airflow.
183
185
 
184
186
  :return: the current Glue job ID.
185
187
  """
@@ -17,22 +17,22 @@
17
17
  # under the License.
18
18
  from __future__ import annotations
19
19
 
20
- from functools import cached_property
21
20
  from typing import TYPE_CHECKING, Any, Sequence
22
21
 
23
22
  from airflow.configuration import conf
24
23
  from airflow.exceptions import AirflowException
24
+ from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
25
25
  from airflow.providers.amazon.aws.triggers.glue_crawler import GlueCrawlerCompleteTrigger
26
26
  from airflow.providers.amazon.aws.utils import validate_execute_complete_event
27
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from airflow.utils.context import Context
30
31
 
31
- from airflow.models import BaseOperator
32
32
  from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
33
33
 
34
34
 
35
- class GlueCrawlerOperator(BaseOperator):
35
+ class GlueCrawlerOperator(AwsBaseOperator[GlueCrawlerHook]):
36
36
  """
37
37
  Creates, updates and triggers an AWS Glue Crawler.
38
38
 
@@ -45,45 +45,45 @@ class GlueCrawlerOperator(BaseOperator):
45
45
  :ref:`howto/operator:GlueCrawlerOperator`
46
46
 
47
47
  :param config: Configurations for the AWS Glue crawler
48
- :param aws_conn_id: The Airflow connection used for AWS credentials.
49
- If this is None or empty then the default boto3 behaviour is used. If
50
- running Airflow in a distributed manner and aws_conn_id is None or
51
- empty, then default boto3 configuration would be used (and must be
52
- maintained on each worker node).
53
48
  :param poll_interval: Time (in seconds) to wait between two consecutive calls to check crawler status
54
49
  :param wait_for_completion: Whether to wait for crawl execution completion. (default: True)
55
50
  :param deferrable: If True, the operator will wait asynchronously for the crawl to complete.
56
51
  This implies waiting for completion. This mode requires aiobotocore module to be installed.
57
52
  (default: False)
53
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
54
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
55
+ running Airflow in a distributed manner and aws_conn_id is None or
56
+ empty, then default boto3 configuration would be used (and must be
57
+ maintained on each worker node).
58
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
59
+ :param verify: Whether or not to verify SSL certificates. See:
60
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
61
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
62
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
58
63
  """
59
64
 
60
- template_fields: Sequence[str] = ("config",)
65
+ aws_hook_class = GlueCrawlerHook
66
+
67
+ template_fields: Sequence[str] = aws_template_fields(
68
+ "config",
69
+ )
61
70
  ui_color = "#ededed"
62
71
 
63
72
  def __init__(
64
73
  self,
65
74
  config,
66
- aws_conn_id="aws_default",
67
- region_name: str | None = None,
68
75
  poll_interval: int = 5,
69
76
  wait_for_completion: bool = True,
70
77
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
71
78
  **kwargs,
72
79
  ):
73
80
  super().__init__(**kwargs)
74
- self.aws_conn_id = aws_conn_id
75
81
  self.poll_interval = poll_interval
76
82
  self.wait_for_completion = wait_for_completion
77
83
  self.deferrable = deferrable
78
- self.region_name = region_name
79
84
  self.config = config
80
85
 
81
- @cached_property
82
- def hook(self) -> GlueCrawlerHook:
83
- """Create and return a GlueCrawlerHook."""
84
- return GlueCrawlerHook(self.aws_conn_id, region_name=self.region_name)
85
-
86
- def execute(self, context: Context):
86
+ def execute(self, context: Context) -> str:
87
87
  """
88
88
  Execute AWS Glue Crawler from Airflow.
89
89
 
@@ -103,6 +103,9 @@ class GlueCrawlerOperator(BaseOperator):
103
103
  crawler_name=crawler_name,
104
104
  waiter_delay=self.poll_interval,
105
105
  aws_conn_id=self.aws_conn_id,
106
+ region_name=self.region_name,
107
+ verify=self.verify,
108
+ botocore_config=self.botocore_config,
106
109
  ),
107
110
  method_name="execute_complete",
108
111
  )
@@ -81,7 +81,8 @@ def handle_waitable_exception(
81
81
 
82
82
 
83
83
  class NeptuneStartDbClusterOperator(AwsBaseOperator[NeptuneHook]):
84
- """Starts an Amazon Neptune DB cluster.
84
+ """
85
+ Starts an Amazon Neptune DB cluster.
85
86
 
86
87
  Amazon Neptune Database is a serverless graph database designed for superior scalability
87
88
  and availability. Neptune Database provides built-in security, continuous backups, and
@@ -38,7 +38,8 @@ if TYPE_CHECKING:
38
38
 
39
39
 
40
40
  class RedshiftCreateClusterOperator(BaseOperator):
41
- """Creates a new cluster with the specified parameters.
41
+ """
42
+ Creates a new cluster with the specified parameters.
42
43
 
43
44
  .. seealso::
44
45
  For more information on how to use this operator, take a look at the guide:
@@ -60,7 +60,8 @@ def serialize(result: dict) -> dict:
60
60
 
61
61
 
62
62
  class SageMakerBaseOperator(BaseOperator):
63
- """This is the base operator for all SageMaker operators.
63
+ """
64
+ This is the base operator for all SageMaker operators.
64
65
 
65
66
  :param config: The configuration necessary to start a training job (templated)
66
67
  """
@@ -30,7 +30,8 @@ from airflow.utils.types import NOTSET, ArgNotSet
30
30
 
31
31
 
32
32
  class AwsBaseSensor(BaseSensorOperator, AwsBaseHookMixin[AwsHookType]):
33
- """Base AWS (Amazon) Sensor Class for build sensors in top of AWS Hooks.
33
+ """
34
+ Base AWS (Amazon) Sensor Class for build sensors in top of AWS Hooks.
34
35
 
35
36
  .. warning::
36
37
  Only for internal usage, this class might be changed, renamed or removed in the future
@@ -18,7 +18,6 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from datetime import timedelta
21
- from functools import cached_property
22
21
  from typing import TYPE_CHECKING, Any, Sequence
23
22
 
24
23
  from deprecated import deprecated
@@ -26,18 +25,23 @@ from deprecated import deprecated
26
25
  from airflow.configuration import conf
27
26
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
28
27
  from airflow.providers.amazon.aws.hooks.glue_catalog import GlueCatalogHook
28
+ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
29
29
  from airflow.providers.amazon.aws.triggers.glue import GlueCatalogPartitionTrigger
30
30
  from airflow.providers.amazon.aws.utils import validate_execute_complete_event
31
- from airflow.sensors.base import BaseSensorOperator
31
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
32
32
 
33
33
  if TYPE_CHECKING:
34
34
  from airflow.utils.context import Context
35
35
 
36
36
 
37
- class GlueCatalogPartitionSensor(BaseSensorOperator):
37
+ class GlueCatalogPartitionSensor(AwsBaseSensor[GlueCatalogHook]):
38
38
  """
39
39
  Waits for a partition to show up in AWS Glue Catalog.
40
40
 
41
+ .. seealso::
42
+ For more information on how to use this sensor, take a look at the guide:
43
+ :ref:`howto/sensor:GlueCatalogPartitionSensor`
44
+
41
45
  :param table_name: The name of the table to wait for, supports the dot
42
46
  notation (my_database.my_table)
43
47
  :param expression: The partition clause to wait for. This is passed as
@@ -46,19 +50,27 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
46
50
  AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"``.
47
51
  See https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html
48
52
  #aws-glue-api-catalog-partitions-GetPartitions
49
- :param aws_conn_id: ID of the Airflow connection where
50
- credentials and extra configuration are stored
51
- :param region_name: Optional aws region name (example: us-east-1). Uses region from connection
52
- if not specified.
53
53
  :param database_name: The name of the catalog database where the partitions reside.
54
54
  :param poke_interval: Time in seconds that the job should wait in
55
55
  between each tries
56
56
  :param deferrable: If true, then the sensor will wait asynchronously for the partition to
57
57
  show up in the AWS Glue Catalog.
58
58
  (default: False, but can be overridden in config file by setting default_deferrable to True)
59
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
60
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
61
+ running Airflow in a distributed manner and aws_conn_id is None or
62
+ empty, then default boto3 configuration would be used (and must be
63
+ maintained on each worker node).
64
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
65
+ :param verify: Whether or not to verify SSL certificates. See:
66
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
67
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
68
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
59
69
  """
60
70
 
61
- template_fields: Sequence[str] = (
71
+ aws_hook_class = GlueCatalogHook
72
+
73
+ template_fields: Sequence[str] = aws_template_fields(
62
74
  "database_name",
63
75
  "table_name",
64
76
  "expression",
@@ -70,19 +82,16 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
70
82
  *,
71
83
  table_name: str,
72
84
  expression: str = "ds='{{ ds }}'",
73
- aws_conn_id: str | None = "aws_default",
74
- region_name: str | None = None,
75
85
  database_name: str = "default",
76
86
  poke_interval: int = 60 * 3,
77
87
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
78
88
  **kwargs,
79
89
  ):
80
- super().__init__(poke_interval=poke_interval, **kwargs)
81
- self.aws_conn_id = aws_conn_id
82
- self.region_name = region_name
90
+ super().__init__(**kwargs)
83
91
  self.table_name = table_name
84
92
  self.expression = expression
85
93
  self.database_name = database_name
94
+ self.poke_interval = poke_interval
86
95
  self.deferrable = deferrable
87
96
 
88
97
  def execute(self, context: Context) -> Any:
@@ -93,7 +102,10 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
93
102
  table_name=self.table_name,
94
103
  expression=self.expression,
95
104
  aws_conn_id=self.aws_conn_id,
105
+ region_name=self.region_name,
96
106
  waiter_delay=int(self.poke_interval),
107
+ verify=self.verify,
108
+ botocore_config=self.botocore_config,
97
109
  ),
98
110
  method_name="execute_complete",
99
111
  timeout=timedelta(seconds=self.timeout),
@@ -126,7 +138,3 @@ class GlueCatalogPartitionSensor(BaseSensorOperator):
126
138
  def get_hook(self) -> GlueCatalogHook:
127
139
  """Get the GlueCatalogHook."""
128
140
  return self.hook
129
-
130
- @cached_property
131
- def hook(self) -> GlueCatalogHook:
132
- return GlueCatalogHook(aws_conn_id=self.aws_conn_id, region_name=self.region_name)
@@ -17,20 +17,20 @@
17
17
  # under the License.
18
18
  from __future__ import annotations
19
19
 
20
- from functools import cached_property
21
20
  from typing import TYPE_CHECKING, Sequence
22
21
 
23
22
  from deprecated import deprecated
24
23
 
25
24
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException
26
25
  from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
27
- from airflow.sensors.base import BaseSensorOperator
26
+ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
27
+ from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from airflow.utils.context import Context
31
31
 
32
32
 
33
- class GlueCrawlerSensor(BaseSensorOperator):
33
+ class GlueCrawlerSensor(AwsBaseSensor[GlueCrawlerHook]):
34
34
  """
35
35
  Waits for an AWS Glue crawler to reach any of the statuses below.
36
36
 
@@ -41,19 +41,27 @@ class GlueCrawlerSensor(BaseSensorOperator):
41
41
  :ref:`howto/sensor:GlueCrawlerSensor`
42
42
 
43
43
  :param crawler_name: The AWS Glue crawler unique name
44
- :param aws_conn_id: aws connection to use, defaults to 'aws_default'
45
- If this is None or empty then the default boto3 behaviour is used. If
44
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
45
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
46
46
  running Airflow in a distributed manner and aws_conn_id is None or
47
47
  empty, then default boto3 configuration would be used (and must be
48
48
  maintained on each worker node).
49
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
50
+ :param verify: Whether or not to verify SSL certificates. See:
51
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
52
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
53
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
49
54
  """
50
55
 
51
- template_fields: Sequence[str] = ("crawler_name",)
56
+ aws_hook_class = GlueCrawlerHook
52
57
 
53
- def __init__(self, *, crawler_name: str, aws_conn_id: str | None = "aws_default", **kwargs) -> None:
58
+ template_fields: Sequence[str] = aws_template_fields(
59
+ "crawler_name",
60
+ )
61
+
62
+ def __init__(self, *, crawler_name: str, **kwargs) -> None:
54
63
  super().__init__(**kwargs)
55
64
  self.crawler_name = crawler_name
56
- self.aws_conn_id = aws_conn_id
57
65
  self.success_statuses = "SUCCEEDED"
58
66
  self.errored_statuses = ("FAILED", "CANCELLED")
59
67
 
@@ -79,7 +87,3 @@ class GlueCrawlerSensor(BaseSensorOperator):
79
87
  def get_hook(self) -> GlueCrawlerHook:
80
88
  """Return a new or pre-existing GlueCrawlerHook."""
81
89
  return self.hook
82
-
83
- @cached_property
84
- def hook(self) -> GlueCrawlerHook:
85
- return GlueCrawlerHook(aws_conn_id=self.aws_conn_id)
@@ -34,7 +34,8 @@ if TYPE_CHECKING:
34
34
 
35
35
 
36
36
  class MongoToS3Operator(BaseOperator):
37
- """Move data from MongoDB to S3.
37
+ """
38
+ Move data from MongoDB to S3.
38
39
 
39
40
  .. seealso::
40
41
  For more information on how to use this operator, take a look at the guide:
@@ -128,7 +129,8 @@ class MongoToS3Operator(BaseOperator):
128
129
 
129
130
  @staticmethod
130
131
  def _stringify(iterable: Iterable, joinable: str = "\n") -> str:
131
- """Stringify an iterable of dicts.
132
+ """
133
+ Stringify an iterable of dicts.
132
134
 
133
135
  This dumps each dict with JSON, and joins them with ``joinable``.
134
136
  """
@@ -136,7 +138,8 @@ class MongoToS3Operator(BaseOperator):
136
138
 
137
139
  @staticmethod
138
140
  def transform(docs: Any) -> Any:
139
- """Transform the data for transfer.
141
+ """
142
+ Transform the data for transfer.
140
143
 
141
144
  This method is meant to be extended by child classes to perform
142
145
  transformations unique to those operators needs. Processes pyMongo
@@ -44,7 +44,8 @@ class KeySchema(TypedDict):
44
44
 
45
45
 
46
46
  class S3ToDynamoDBOperator(BaseOperator):
47
- """Load Data from S3 into a DynamoDB.
47
+ """
48
+ Load Data from S3 into a DynamoDB.
48
49
 
49
50
  Data stored in S3 can be uploaded to a new or existing DynamoDB. Supported file formats CSV, DynamoDB JSON and
50
51
  Amazon ION.
@@ -30,7 +30,8 @@ if TYPE_CHECKING:
30
30
 
31
31
 
32
32
  class S3ToSqlOperator(BaseOperator):
33
- """Load Data from S3 into a SQL Database.
33
+ """
34
+ Load Data from S3 into a SQL Database.
34
35
 
35
36
  You need to provide a parser function that takes a filename as an input
36
37
  and returns an iterable of rows
@@ -179,7 +179,9 @@ class TaskDoneTrigger(BaseTrigger):
179
179
  cluster=self.cluster, tasks=[self.task_arn], WaiterConfig={"MaxAttempts": 1}
180
180
  )
181
181
  # we reach this point only if the waiter met a success criteria
182
- yield TriggerEvent({"status": "success", "task_arn": self.task_arn})
182
+ yield TriggerEvent(
183
+ {"status": "success", "task_arn": self.task_arn, "cluster": self.cluster}
184
+ )
183
185
  return
184
186
  except WaiterError as error:
185
187
  if "terminal failure" in str(error):