apache-airflow-providers-amazon 9.9.0rc1__py3-none-any.whl → 9.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/auth_manager/avp/facade.py +8 -1
  3. airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +0 -55
  4. airflow/providers/amazon/aws/bundles/__init__.py +16 -0
  5. airflow/providers/amazon/aws/bundles/s3.py +152 -0
  6. airflow/providers/amazon/aws/executors/batch/batch_executor.py +51 -0
  7. airflow/providers/amazon/aws/executors/ecs/utils.py +2 -2
  8. airflow/providers/amazon/aws/executors/utils/exponential_backoff_retry.py +1 -1
  9. airflow/providers/amazon/aws/fs/s3.py +2 -1
  10. airflow/providers/amazon/aws/hooks/athena_sql.py +12 -2
  11. airflow/providers/amazon/aws/hooks/base_aws.py +29 -17
  12. airflow/providers/amazon/aws/hooks/batch_client.py +2 -1
  13. airflow/providers/amazon/aws/hooks/batch_waiters.py +2 -1
  14. airflow/providers/amazon/aws/hooks/chime.py +5 -1
  15. airflow/providers/amazon/aws/hooks/ec2.py +2 -1
  16. airflow/providers/amazon/aws/hooks/eks.py +1 -2
  17. airflow/providers/amazon/aws/hooks/glue.py +82 -7
  18. airflow/providers/amazon/aws/hooks/rds.py +2 -1
  19. airflow/providers/amazon/aws/hooks/s3.py +86 -3
  20. airflow/providers/amazon/aws/hooks/sagemaker.py +2 -2
  21. airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +1 -1
  22. airflow/providers/amazon/aws/links/base_aws.py +2 -10
  23. airflow/providers/amazon/aws/operators/base_aws.py +1 -1
  24. airflow/providers/amazon/aws/operators/batch.py +6 -22
  25. airflow/providers/amazon/aws/operators/ecs.py +1 -1
  26. airflow/providers/amazon/aws/operators/glue.py +23 -8
  27. airflow/providers/amazon/aws/operators/redshift_data.py +1 -1
  28. airflow/providers/amazon/aws/operators/sagemaker.py +2 -2
  29. airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +1 -1
  30. airflow/providers/amazon/aws/sensors/base_aws.py +1 -1
  31. airflow/providers/amazon/aws/sensors/glue.py +57 -12
  32. airflow/providers/amazon/aws/sensors/s3.py +2 -2
  33. airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +1 -1
  34. airflow/providers/amazon/aws/transfers/azure_blob_to_s3.py +1 -1
  35. airflow/providers/amazon/aws/transfers/base.py +1 -1
  36. airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +2 -2
  37. airflow/providers/amazon/aws/transfers/exasol_to_s3.py +1 -1
  38. airflow/providers/amazon/aws/transfers/ftp_to_s3.py +1 -1
  39. airflow/providers/amazon/aws/transfers/gcs_to_s3.py +1 -1
  40. airflow/providers/amazon/aws/transfers/glacier_to_gcs.py +1 -1
  41. airflow/providers/amazon/aws/transfers/google_api_to_s3.py +6 -2
  42. airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py +3 -3
  43. airflow/providers/amazon/aws/transfers/http_to_s3.py +1 -1
  44. airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py +1 -1
  45. airflow/providers/amazon/aws/transfers/local_to_s3.py +1 -1
  46. airflow/providers/amazon/aws/transfers/mongo_to_s3.py +1 -1
  47. airflow/providers/amazon/aws/transfers/redshift_to_s3.py +1 -1
  48. airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py +1 -1
  49. airflow/providers/amazon/aws/transfers/s3_to_ftp.py +1 -1
  50. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +1 -1
  51. airflow/providers/amazon/aws/transfers/s3_to_sftp.py +1 -1
  52. airflow/providers/amazon/aws/transfers/s3_to_sql.py +3 -4
  53. airflow/providers/amazon/aws/transfers/salesforce_to_s3.py +1 -1
  54. airflow/providers/amazon/aws/transfers/sftp_to_s3.py +1 -1
  55. airflow/providers/amazon/aws/transfers/sql_to_s3.py +2 -5
  56. airflow/providers/amazon/aws/triggers/base.py +0 -1
  57. airflow/providers/amazon/aws/triggers/glue.py +37 -24
  58. airflow/providers/amazon/aws/utils/connection_wrapper.py +10 -1
  59. airflow/providers/amazon/aws/utils/suppress.py +2 -1
  60. airflow/providers/amazon/aws/utils/waiter.py +1 -1
  61. airflow/providers/amazon/aws/waiters/glue.json +55 -0
  62. airflow/providers/amazon/version_compat.py +24 -0
  63. {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/METADATA +8 -9
  64. {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/RECORD +66 -64
  65. {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/WHEEL +0 -0
  66. {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/entry_points.txt +0 -0
@@ -18,7 +18,6 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from collections.abc import Sequence
21
- from functools import cached_property
22
21
  from typing import TYPE_CHECKING, Any
23
22
 
24
23
  from airflow.configuration import conf
@@ -28,16 +27,16 @@ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
28
27
  from airflow.providers.amazon.aws.triggers.glue import (
29
28
  GlueDataQualityRuleRecommendationRunCompleteTrigger,
30
29
  GlueDataQualityRuleSetEvaluationRunCompleteTrigger,
30
+ GlueJobCompleteTrigger,
31
31
  )
32
32
  from airflow.providers.amazon.aws.utils import validate_execute_complete_event
33
33
  from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
34
- from airflow.sensors.base import BaseSensorOperator
35
34
 
36
35
  if TYPE_CHECKING:
37
36
  from airflow.utils.context import Context
38
37
 
39
38
 
40
- class GlueJobSensor(BaseSensorOperator):
39
+ class GlueJobSensor(AwsBaseSensor[GlueJobHook]):
41
40
  """
42
41
  Waits for an AWS Glue Job to reach any of the status below.
43
42
 
@@ -50,9 +49,29 @@ class GlueJobSensor(BaseSensorOperator):
50
49
  :param job_name: The AWS Glue Job unique name
51
50
  :param run_id: The AWS Glue current running job identifier
52
51
  :param verbose: If True, more Glue Job Run logs show in the Airflow Task Logs. (default: False)
52
+ :param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
53
+ module to be installed.
54
+ (default: False, but can be overridden in config file by setting default_deferrable to True)
55
+ :param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
56
+ :param max_retries: Number of times before returning the current state. (default: 60)
57
+
58
+ :param aws_conn_id: The Airflow connection used for AWS credentials.
59
+ If this is ``None`` or empty then the default boto3 behaviour is used. If
60
+ running Airflow in a distributed manner and aws_conn_id is None or
61
+ empty, then default boto3 configuration would be used (and must be
62
+ maintained on each worker node).
63
+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
64
+ :param verify: Whether to verify SSL certificates. See:
65
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
66
+ :param botocore_config: Configuration dictionary (key-values) for botocore client. See:
67
+ https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
53
68
  """
54
69
 
55
- template_fields: Sequence[str] = ("job_name", "run_id")
70
+ SUCCESS_STATES = ("SUCCEEDED",)
71
+ FAILURE_STATES = ("FAILED", "STOPPED", "TIMEOUT")
72
+
73
+ aws_hook_class = GlueJobHook
74
+ template_fields: Sequence[str] = aws_template_fields("job_name", "run_id")
56
75
 
57
76
  def __init__(
58
77
  self,
@@ -60,6 +79,9 @@ class GlueJobSensor(BaseSensorOperator):
60
79
  job_name: str,
61
80
  run_id: str,
62
81
  verbose: bool = False,
82
+ deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
83
+ poke_interval: int = 120,
84
+ max_retries: int = 60,
63
85
  aws_conn_id: str | None = "aws_default",
64
86
  **kwargs,
65
87
  ):
@@ -67,24 +89,47 @@ class GlueJobSensor(BaseSensorOperator):
67
89
  self.job_name = job_name
68
90
  self.run_id = run_id
69
91
  self.verbose = verbose
92
+ self.deferrable = deferrable
93
+ self.poke_interval = poke_interval
94
+ self.max_retries = max_retries
70
95
  self.aws_conn_id = aws_conn_id
71
- self.success_states: list[str] = ["SUCCEEDED"]
72
- self.errored_states: list[str] = ["FAILED", "STOPPED", "TIMEOUT"]
73
96
  self.next_log_tokens = GlueJobHook.LogContinuationTokens()
74
97
 
75
- @cached_property
76
- def hook(self):
77
- return GlueJobHook(aws_conn_id=self.aws_conn_id)
98
+ def execute(self, context: Context) -> Any:
99
+ if self.deferrable:
100
+ self.defer(
101
+ trigger=GlueJobCompleteTrigger(
102
+ job_name=self.job_name,
103
+ run_id=self.run_id,
104
+ verbose=self.verbose,
105
+ aws_conn_id=self.aws_conn_id,
106
+ waiter_delay=int(self.poke_interval),
107
+ waiter_max_attempts=self.max_retries,
108
+ region_name=self.region_name,
109
+ ),
110
+ method_name="execute_complete",
111
+ )
112
+ else:
113
+ super().execute(context=context)
78
114
 
79
- def poke(self, context: Context):
115
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
116
+ validated_event = validate_execute_complete_event(event)
117
+
118
+ if validated_event["status"] != "success":
119
+ message = f"Error: AWS Glue Job: {validated_event}"
120
+ raise AirflowException(message)
121
+
122
+ self.log.info("AWS Glue Job completed.")
123
+
124
+ def poke(self, context: Context) -> bool:
80
125
  self.log.info("Poking for job run status :for Glue Job %s and ID %s", self.job_name, self.run_id)
81
126
  job_state = self.hook.get_job_state(job_name=self.job_name, run_id=self.run_id)
82
127
 
83
128
  try:
84
- if job_state in self.success_states:
129
+ if job_state in self.SUCCESS_STATES:
85
130
  self.log.info("Exiting Job %s Run State: %s", self.run_id, job_state)
86
131
  return True
87
- if job_state in self.errored_states:
132
+ if job_state in self.FAILURE_STATES:
88
133
  job_error_message = "Exiting Job %s Run State: %s", self.run_id, job_state
89
134
  self.log.info(job_error_message)
90
135
  raise AirflowException(job_error_message)
@@ -21,9 +21,9 @@ import fnmatch
21
21
  import inspect
22
22
  import os
23
23
  import re
24
- from collections.abc import Sequence
24
+ from collections.abc import Callable, Sequence
25
25
  from datetime import datetime, timedelta
26
- from typing import TYPE_CHECKING, Any, Callable, cast
26
+ from typing import TYPE_CHECKING, Any, cast
27
27
 
28
28
  from airflow.configuration import conf
29
29
  from airflow.providers.amazon.aws.utils import validate_execute_complete_event
@@ -25,7 +25,7 @@ from airflow.exceptions import AirflowException
25
25
  from airflow.providers.amazon.aws.hooks.sagemaker_unified_studio import (
26
26
  SageMakerNotebookHook,
27
27
  )
28
- from airflow.sensors.base import BaseSensorOperator
28
+ from airflow.providers.amazon.version_compat import BaseSensorOperator
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from airflow.utils.context import Context
@@ -22,8 +22,8 @@ import tempfile
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
- from airflow.models import BaseOperator
26
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
+ from airflow.providers.amazon.version_compat import BaseOperator
27
27
 
28
28
  try:
29
29
  from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
@@ -21,8 +21,8 @@ from __future__ import annotations
21
21
 
22
22
  from collections.abc import Sequence
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
25
+ from airflow.providers.amazon.version_compat import BaseOperator
26
26
  from airflow.utils.types import NOTSET, ArgNotSet
27
27
 
28
28
 
@@ -20,13 +20,13 @@ from __future__ import annotations
20
20
 
21
21
  import json
22
22
  import os
23
- from collections.abc import Sequence
23
+ from collections.abc import Callable, Sequence
24
24
  from copy import copy
25
25
  from datetime import datetime
26
26
  from decimal import Decimal
27
27
  from functools import cached_property
28
28
  from tempfile import NamedTemporaryFile
29
- from typing import IO, TYPE_CHECKING, Any, Callable
29
+ from typing import IO, TYPE_CHECKING, Any
30
30
  from uuid import uuid4
31
31
 
32
32
  from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
@@ -23,8 +23,8 @@ from collections.abc import Sequence
23
23
  from tempfile import NamedTemporaryFile
24
24
  from typing import TYPE_CHECKING
25
25
 
26
- from airflow.models import BaseOperator
27
26
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
+ from airflow.providers.amazon.version_compat import BaseOperator
28
28
  from airflow.providers.exasol.hooks.exasol import ExasolHook
29
29
 
30
30
  if TYPE_CHECKING:
@@ -21,8 +21,8 @@ from collections.abc import Sequence
21
21
  from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
+ from airflow.providers.amazon.version_compat import BaseOperator
26
26
  from airflow.providers.ftp.hooks.ftp import FTPHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -26,8 +26,8 @@ from typing import TYPE_CHECKING
26
26
  from packaging.version import Version
27
27
 
28
28
  from airflow.exceptions import AirflowException
29
- from airflow.models import BaseOperator
30
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
+ from airflow.providers.amazon.version_compat import BaseOperator
31
31
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
32
32
 
33
33
  if TYPE_CHECKING:
@@ -21,8 +21,8 @@ import tempfile
21
21
  from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.amazon.aws.hooks.glacier import GlacierHook
25
+ from airflow.providers.amazon.version_compat import BaseOperator
26
26
  from airflow.providers.google.cloud.hooks.gcs import GCSHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -24,9 +24,9 @@ import sys
24
24
  from collections.abc import Sequence
25
25
  from typing import TYPE_CHECKING
26
26
 
27
- from airflow.models import BaseOperator
28
- from airflow.models.xcom import MAX_XCOM_SIZE, XCOM_RETURN_KEY
27
+ from airflow.models.xcom import XCOM_RETURN_KEY
29
28
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
29
+ from airflow.providers.amazon.version_compat import BaseOperator
30
30
  from airflow.providers.google.common.hooks.discovery_api import GoogleDiscoveryApiHook
31
31
 
32
32
  if TYPE_CHECKING:
@@ -36,6 +36,10 @@ if TYPE_CHECKING:
36
36
  from airflow.models import TaskInstance as RuntimeTaskInstanceProtocol # type: ignore[assignment]
37
37
  from airflow.utils.context import Context
38
38
 
39
+ # MAX XCOM Size is 48KB
40
+ # https://github.com/apache/airflow/pull/1618#discussion_r68249677
41
+ MAX_XCOM_SIZE = 49344
42
+
39
43
 
40
44
  class GoogleApiToS3Operator(BaseOperator):
41
45
  """
@@ -20,11 +20,11 @@
20
20
  from __future__ import annotations
21
21
 
22
22
  import json
23
- from collections.abc import Sequence
24
- from typing import TYPE_CHECKING, Callable
23
+ from collections.abc import Callable, Sequence
24
+ from typing import TYPE_CHECKING
25
25
 
26
- from airflow.models import BaseOperator
27
26
  from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
27
+ from airflow.providers.amazon.version_compat import BaseOperator
28
28
  from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook
29
29
 
30
30
  if TYPE_CHECKING:
@@ -22,8 +22,8 @@ from __future__ import annotations
22
22
  from functools import cached_property
23
23
  from typing import TYPE_CHECKING, Any
24
24
 
25
- from airflow.models import BaseOperator
26
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
+ from airflow.providers.amazon.version_compat import BaseOperator
27
27
  from airflow.providers.http.hooks.http import HttpHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -22,8 +22,8 @@ from __future__ import annotations
22
22
  from collections.abc import Sequence
23
23
  from typing import TYPE_CHECKING
24
24
 
25
- from airflow.models import BaseOperator
26
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
+ from airflow.providers.amazon.version_compat import BaseOperator
27
27
  from airflow.providers.imap.hooks.imap import ImapHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -20,8 +20,8 @@ from __future__ import annotations
20
20
  from collections.abc import Sequence
21
21
  from typing import TYPE_CHECKING
22
22
 
23
- from airflow.models import BaseOperator
24
23
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
24
+ from airflow.providers.amazon.version_compat import BaseOperator
25
25
 
26
26
  if TYPE_CHECKING:
27
27
  from airflow.utils.context import Context
@@ -23,8 +23,8 @@ from typing import TYPE_CHECKING, Any, cast
23
23
 
24
24
  from bson import json_util
25
25
 
26
- from airflow.models import BaseOperator
27
26
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
+ from airflow.providers.amazon.version_compat import BaseOperator
28
28
  from airflow.providers.mongo.hooks.mongo import MongoHook
29
29
 
30
30
  if TYPE_CHECKING:
@@ -24,11 +24,11 @@ from collections.abc import Iterable, Mapping, Sequence
24
24
  from typing import TYPE_CHECKING
25
25
 
26
26
  from airflow.exceptions import AirflowException
27
- from airflow.models import BaseOperator
28
27
  from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
29
28
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
30
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
31
30
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
31
+ from airflow.providers.amazon.version_compat import BaseOperator
32
32
  from airflow.utils.types import NOTSET, ArgNotSet
33
33
 
34
34
  if TYPE_CHECKING:
@@ -23,8 +23,8 @@ from typing import TYPE_CHECKING, Any, Literal, TypedDict
23
23
  from botocore.exceptions import ClientError, WaiterError
24
24
 
25
25
  from airflow.exceptions import AirflowException
26
- from airflow.models import BaseOperator
27
26
  from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
27
+ from airflow.providers.amazon.version_compat import BaseOperator
28
28
 
29
29
  if TYPE_CHECKING:
30
30
  from airflow.utils.context import Context
@@ -21,8 +21,8 @@ from collections.abc import Sequence
21
21
  from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
+ from airflow.providers.amazon.version_compat import BaseOperator
26
26
  from airflow.providers.ftp.hooks.ftp import FTPHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -20,11 +20,11 @@ from collections.abc import Iterable, Sequence
20
20
  from typing import TYPE_CHECKING
21
21
 
22
22
  from airflow.exceptions import AirflowException
23
- from airflow.models import BaseOperator
24
23
  from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
25
24
  from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
26
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
27
26
  from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
27
+ from airflow.providers.amazon.version_compat import BaseOperator
28
28
  from airflow.utils.types import NOTSET, ArgNotSet
29
29
 
30
30
  if TYPE_CHECKING:
@@ -22,8 +22,8 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
  from urllib.parse import urlsplit
24
24
 
25
- from airflow.models import BaseOperator
26
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
+ from airflow.providers.amazon.version_compat import BaseOperator
27
27
  from airflow.providers.ssh.hooks.ssh import SSHHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -16,15 +16,14 @@
16
16
  # under the License.
17
17
  from __future__ import annotations
18
18
 
19
- from collections.abc import Iterable, Sequence
19
+ from collections.abc import Callable, Iterable, Sequence
20
20
  from functools import cached_property
21
21
  from tempfile import NamedTemporaryFile
22
- from typing import TYPE_CHECKING, Callable
22
+ from typing import TYPE_CHECKING
23
23
 
24
24
  from airflow.exceptions import AirflowException
25
- from airflow.hooks.base import BaseHook
26
- from airflow.models import BaseOperator
27
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
+ from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
28
27
 
29
28
  if TYPE_CHECKING:
30
29
  from airflow.utils.context import Context
@@ -21,8 +21,8 @@ import tempfile
21
21
  from collections.abc import Sequence
22
22
  from typing import TYPE_CHECKING
23
23
 
24
- from airflow.models import BaseOperator
25
24
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
25
+ from airflow.providers.amazon.version_compat import BaseOperator
26
26
  from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
27
27
 
28
28
  if TYPE_CHECKING:
@@ -22,8 +22,8 @@ from tempfile import NamedTemporaryFile
22
22
  from typing import TYPE_CHECKING
23
23
  from urllib.parse import urlsplit
24
24
 
25
- from airflow.models import BaseOperator
26
25
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
26
+ from airflow.providers.amazon.version_compat import BaseOperator
27
27
  from airflow.providers.ssh.hooks.ssh import SSHHook
28
28
 
29
29
  if TYPE_CHECKING:
@@ -22,14 +22,11 @@ import gzip
22
22
  import io
23
23
  from collections import namedtuple
24
24
  from collections.abc import Iterable, Mapping, Sequence
25
- from typing import TYPE_CHECKING, Any, cast
26
-
27
- from typing_extensions import Literal
25
+ from typing import TYPE_CHECKING, Any, Literal, cast
28
26
 
29
27
  from airflow.exceptions import AirflowException
30
- from airflow.hooks.base import BaseHook
31
- from airflow.models import BaseOperator
32
28
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
29
+ from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
33
30
 
34
31
  if TYPE_CHECKING:
35
32
  import pandas as pd
@@ -88,7 +88,6 @@ class AwsBaseWaiterTrigger(BaseTrigger):
88
88
  super().__init__()
89
89
  # parameters that should be hardcoded in the child's implem
90
90
  self.serialized_fields = serialized_fields
91
-
92
91
  self.waiter_name = waiter_name
93
92
  self.waiter_args = waiter_args
94
93
  self.failure_message = failure_message
@@ -31,49 +31,62 @@ from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
31
31
  from airflow.triggers.base import BaseTrigger, TriggerEvent
32
32
 
33
33
 
34
- class GlueJobCompleteTrigger(BaseTrigger):
34
+ class GlueJobCompleteTrigger(AwsBaseWaiterTrigger):
35
35
  """
36
36
  Watches for a glue job, triggers when it finishes.
37
37
 
38
38
  :param job_name: glue job name
39
39
  :param run_id: the ID of the specific run to watch for that job
40
40
  :param verbose: whether to print the job's logs in airflow logs or not
41
- :param aws_conn_id: The Airflow connection used for AWS credentials.
41
+ :param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
42
+ :param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
43
+ :param aws_conn_id: The Airflow connection used for AWS credentials
44
+ :param region_name: Optional aws region name (example: us-east-1). Uses region from connection
45
+ if not specified.
46
+ :param verify: Whether or not to verify SSL certificates.
47
+ :param botocore_config: Configuration dictionary (key-values) for botocore client.
42
48
  """
43
49
 
44
50
  def __init__(
45
51
  self,
46
52
  job_name: str,
47
53
  run_id: str,
48
- verbose: bool,
49
- aws_conn_id: str | None,
50
- job_poll_interval: int | float,
54
+ verbose: bool = False,
55
+ waiter_delay: int = 60,
56
+ waiter_max_attempts: int = 75,
57
+ aws_conn_id: str | None = "aws_default",
58
+ region_name: str | None = None,
59
+ verify: bool | str | None = None,
60
+ botocore_config: dict | None = None,
51
61
  ):
52
- super().__init__()
62
+ super().__init__(
63
+ serialized_fields={"job_name": job_name, "run_id": run_id, "verbose": verbose},
64
+ waiter_name="job_complete",
65
+ waiter_args={"JobName": job_name, "RunId": run_id},
66
+ failure_message="AWS Glue job failed.",
67
+ status_message="Status of AWS Glue job is",
68
+ status_queries=["JobRun.JobRunState"],
69
+ return_key="run_id",
70
+ return_value=run_id,
71
+ waiter_delay=waiter_delay,
72
+ waiter_max_attempts=waiter_max_attempts,
73
+ aws_conn_id=aws_conn_id,
74
+ region_name=region_name,
75
+ verify=verify,
76
+ botocore_config=botocore_config,
77
+ )
53
78
  self.job_name = job_name
54
79
  self.run_id = run_id
55
80
  self.verbose = verbose
56
- self.aws_conn_id = aws_conn_id
57
- self.job_poll_interval = job_poll_interval
58
81
 
59
- def serialize(self) -> tuple[str, dict[str, Any]]:
60
- return (
61
- # dynamically generate the fully qualified name of the class
62
- self.__class__.__module__ + "." + self.__class__.__qualname__,
63
- {
64
- "job_name": self.job_name,
65
- "run_id": self.run_id,
66
- "verbose": self.verbose,
67
- "aws_conn_id": self.aws_conn_id,
68
- "job_poll_interval": self.job_poll_interval,
69
- },
82
+ def hook(self) -> AwsGenericHook:
83
+ return GlueJobHook(
84
+ aws_conn_id=self.aws_conn_id,
85
+ region_name=self.region_name,
86
+ verify=self.verify,
87
+ config=self.botocore_config,
70
88
  )
71
89
 
72
- async def run(self) -> AsyncIterator[TriggerEvent]:
73
- hook = GlueJobHook(aws_conn_id=self.aws_conn_id, job_poll_interval=self.job_poll_interval)
74
- await hook.async_job_completion(self.job_name, self.run_id, self.verbose)
75
- yield TriggerEvent({"status": "success", "message": "Job done", "value": self.run_id})
76
-
77
90
 
78
91
  class GlueCatalogPartitionTrigger(BaseTrigger):
79
92
  """
@@ -32,7 +32,10 @@ from airflow.utils.log.logging_mixin import LoggingMixin
32
32
  from airflow.utils.types import NOTSET, ArgNotSet
33
33
 
34
34
  if TYPE_CHECKING:
35
- from airflow.models.connection import Connection # Avoid circular imports.
35
+ try:
36
+ from airflow.sdk import Connection
37
+ except ImportError:
38
+ from airflow.models.connection import Connection # type: ignore[assignment]
36
39
 
37
40
 
38
41
  @dataclass
@@ -245,6 +248,12 @@ class AwsConnectionWrapper(LoggingMixin):
245
248
  config_kwargs["signature_version"] = UNSIGNED
246
249
  self.botocore_config = Config(**config_kwargs)
247
250
 
251
+ if "endpoint_url" not in extra:
252
+ self.log.debug(
253
+ "Missing endpoint_url in extra config of AWS Connection with id %s. Using default AWS service endpoint",
254
+ conn.conn_id,
255
+ )
256
+
248
257
  self.endpoint_url = extra.get("endpoint_url")
249
258
 
250
259
  # Retrieve Assume Role Configuration
@@ -28,8 +28,9 @@ Module for suppress errors in Amazon Provider.
28
28
  from __future__ import annotations
29
29
 
30
30
  import logging
31
+ from collections.abc import Callable
31
32
  from functools import wraps
32
- from typing import Callable, TypeVar
33
+ from typing import TypeVar
33
34
 
34
35
  from airflow.typing_compat import ParamSpec
35
36
 
@@ -19,8 +19,8 @@ from __future__ import annotations
19
19
 
20
20
  import logging
21
21
  import time
22
+ from collections.abc import Callable
22
23
  from enum import Enum
23
- from typing import Callable
24
24
 
25
25
  from airflow.exceptions import AirflowException
26
26
 
@@ -1,6 +1,61 @@
1
1
  {
2
2
  "version": 2,
3
3
  "waiters": {
4
+ "job_complete": {
5
+ "operation": "GetJobRun",
6
+ "delay": 60,
7
+ "maxAttempts": 75,
8
+ "acceptors": [
9
+ {
10
+ "matcher": "path",
11
+ "argument": "JobRun.JobRunState",
12
+ "expected": "STARTING",
13
+ "state": "retry"
14
+ },
15
+ {
16
+ "matcher": "path",
17
+ "argument": "JobRun.JobRunState",
18
+ "expected": "RUNNING",
19
+ "state": "retry"
20
+ },
21
+ {
22
+ "matcher": "path",
23
+ "argument": "JobRun.JobRunState",
24
+ "expected": "STOPPING",
25
+ "state": "retry"
26
+ },
27
+ {
28
+ "matcher": "path",
29
+ "argument": "JobRun.JobRunState",
30
+ "expected": "STOPPED",
31
+ "state": "failure"
32
+ },
33
+ {
34
+ "matcher": "path",
35
+ "argument": "JobRun.JobRunState",
36
+ "expected": "FAILED",
37
+ "state": "failure"
38
+ },
39
+ {
40
+ "matcher": "path",
41
+ "argument": "JobRun.JobRunState",
42
+ "expected": "ERROR",
43
+ "state": "failure"
44
+ },
45
+ {
46
+ "matcher": "path",
47
+ "argument": "JobRun.JobRunState",
48
+ "expected": "TIMEOUT",
49
+ "state": "failure"
50
+ },
51
+ {
52
+ "matcher": "path",
53
+ "argument": "JobRun.JobRunState",
54
+ "expected": "SUCCEEDED",
55
+ "state": "success"
56
+ }
57
+ ]
58
+ },
4
59
  "crawler_ready": {
5
60
  "operation": "GetCrawler",
6
61
  "delay": 5,