apache-airflow-providers-amazon 9.9.0rc1__py3-none-any.whl → 9.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/auth_manager/avp/facade.py +8 -1
- airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +0 -55
- airflow/providers/amazon/aws/bundles/__init__.py +16 -0
- airflow/providers/amazon/aws/bundles/s3.py +152 -0
- airflow/providers/amazon/aws/executors/batch/batch_executor.py +51 -0
- airflow/providers/amazon/aws/executors/ecs/utils.py +2 -2
- airflow/providers/amazon/aws/executors/utils/exponential_backoff_retry.py +1 -1
- airflow/providers/amazon/aws/fs/s3.py +2 -1
- airflow/providers/amazon/aws/hooks/athena_sql.py +12 -2
- airflow/providers/amazon/aws/hooks/base_aws.py +29 -17
- airflow/providers/amazon/aws/hooks/batch_client.py +2 -1
- airflow/providers/amazon/aws/hooks/batch_waiters.py +2 -1
- airflow/providers/amazon/aws/hooks/chime.py +5 -1
- airflow/providers/amazon/aws/hooks/ec2.py +2 -1
- airflow/providers/amazon/aws/hooks/eks.py +1 -2
- airflow/providers/amazon/aws/hooks/glue.py +82 -7
- airflow/providers/amazon/aws/hooks/rds.py +2 -1
- airflow/providers/amazon/aws/hooks/s3.py +86 -3
- airflow/providers/amazon/aws/hooks/sagemaker.py +2 -2
- airflow/providers/amazon/aws/hooks/sagemaker_unified_studio.py +1 -1
- airflow/providers/amazon/aws/links/base_aws.py +2 -10
- airflow/providers/amazon/aws/operators/base_aws.py +1 -1
- airflow/providers/amazon/aws/operators/batch.py +6 -22
- airflow/providers/amazon/aws/operators/ecs.py +1 -1
- airflow/providers/amazon/aws/operators/glue.py +23 -8
- airflow/providers/amazon/aws/operators/redshift_data.py +1 -1
- airflow/providers/amazon/aws/operators/sagemaker.py +2 -2
- airflow/providers/amazon/aws/operators/sagemaker_unified_studio.py +1 -1
- airflow/providers/amazon/aws/sensors/base_aws.py +1 -1
- airflow/providers/amazon/aws/sensors/glue.py +57 -12
- airflow/providers/amazon/aws/sensors/s3.py +2 -2
- airflow/providers/amazon/aws/sensors/sagemaker_unified_studio.py +1 -1
- airflow/providers/amazon/aws/transfers/azure_blob_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/base.py +1 -1
- airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +2 -2
- airflow/providers/amazon/aws/transfers/exasol_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/ftp_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/gcs_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/glacier_to_gcs.py +1 -1
- airflow/providers/amazon/aws/transfers/google_api_to_s3.py +6 -2
- airflow/providers/amazon/aws/transfers/hive_to_dynamodb.py +3 -3
- airflow/providers/amazon/aws/transfers/http_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/imap_attachment_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/local_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/mongo_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/redshift_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/s3_to_dynamodb.py +1 -1
- airflow/providers/amazon/aws/transfers/s3_to_ftp.py +1 -1
- airflow/providers/amazon/aws/transfers/s3_to_redshift.py +1 -1
- airflow/providers/amazon/aws/transfers/s3_to_sftp.py +1 -1
- airflow/providers/amazon/aws/transfers/s3_to_sql.py +3 -4
- airflow/providers/amazon/aws/transfers/salesforce_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/sftp_to_s3.py +1 -1
- airflow/providers/amazon/aws/transfers/sql_to_s3.py +2 -5
- airflow/providers/amazon/aws/triggers/base.py +0 -1
- airflow/providers/amazon/aws/triggers/glue.py +37 -24
- airflow/providers/amazon/aws/utils/connection_wrapper.py +10 -1
- airflow/providers/amazon/aws/utils/suppress.py +2 -1
- airflow/providers/amazon/aws/utils/waiter.py +1 -1
- airflow/providers/amazon/aws/waiters/glue.json +55 -0
- airflow/providers/amazon/version_compat.py +24 -0
- {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/METADATA +8 -9
- {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/RECORD +66 -64
- {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_amazon-9.9.0rc1.dist-info → apache_airflow_providers_amazon-9.10.0rc1.dist-info}/entry_points.txt +0 -0
@@ -18,7 +18,6 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
from collections.abc import Sequence
|
21
|
-
from functools import cached_property
|
22
21
|
from typing import TYPE_CHECKING, Any
|
23
22
|
|
24
23
|
from airflow.configuration import conf
|
@@ -28,16 +27,16 @@ from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
|
|
28
27
|
from airflow.providers.amazon.aws.triggers.glue import (
|
29
28
|
GlueDataQualityRuleRecommendationRunCompleteTrigger,
|
30
29
|
GlueDataQualityRuleSetEvaluationRunCompleteTrigger,
|
30
|
+
GlueJobCompleteTrigger,
|
31
31
|
)
|
32
32
|
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
33
33
|
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
34
|
-
from airflow.sensors.base import BaseSensorOperator
|
35
34
|
|
36
35
|
if TYPE_CHECKING:
|
37
36
|
from airflow.utils.context import Context
|
38
37
|
|
39
38
|
|
40
|
-
class GlueJobSensor(
|
39
|
+
class GlueJobSensor(AwsBaseSensor[GlueJobHook]):
|
41
40
|
"""
|
42
41
|
Waits for an AWS Glue Job to reach any of the status below.
|
43
42
|
|
@@ -50,9 +49,29 @@ class GlueJobSensor(BaseSensorOperator):
|
|
50
49
|
:param job_name: The AWS Glue Job unique name
|
51
50
|
:param run_id: The AWS Glue current running job identifier
|
52
51
|
:param verbose: If True, more Glue Job Run logs show in the Airflow Task Logs. (default: False)
|
52
|
+
:param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
|
53
|
+
module to be installed.
|
54
|
+
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
55
|
+
:param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
|
56
|
+
:param max_retries: Number of times before returning the current state. (default: 60)
|
57
|
+
|
58
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
59
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
60
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
61
|
+
empty, then default boto3 configuration would be used (and must be
|
62
|
+
maintained on each worker node).
|
63
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
64
|
+
:param verify: Whether to verify SSL certificates. See:
|
65
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
66
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
67
|
+
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
53
68
|
"""
|
54
69
|
|
55
|
-
|
70
|
+
SUCCESS_STATES = ("SUCCEEDED",)
|
71
|
+
FAILURE_STATES = ("FAILED", "STOPPED", "TIMEOUT")
|
72
|
+
|
73
|
+
aws_hook_class = GlueJobHook
|
74
|
+
template_fields: Sequence[str] = aws_template_fields("job_name", "run_id")
|
56
75
|
|
57
76
|
def __init__(
|
58
77
|
self,
|
@@ -60,6 +79,9 @@ class GlueJobSensor(BaseSensorOperator):
|
|
60
79
|
job_name: str,
|
61
80
|
run_id: str,
|
62
81
|
verbose: bool = False,
|
82
|
+
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
83
|
+
poke_interval: int = 120,
|
84
|
+
max_retries: int = 60,
|
63
85
|
aws_conn_id: str | None = "aws_default",
|
64
86
|
**kwargs,
|
65
87
|
):
|
@@ -67,24 +89,47 @@ class GlueJobSensor(BaseSensorOperator):
|
|
67
89
|
self.job_name = job_name
|
68
90
|
self.run_id = run_id
|
69
91
|
self.verbose = verbose
|
92
|
+
self.deferrable = deferrable
|
93
|
+
self.poke_interval = poke_interval
|
94
|
+
self.max_retries = max_retries
|
70
95
|
self.aws_conn_id = aws_conn_id
|
71
|
-
self.success_states: list[str] = ["SUCCEEDED"]
|
72
|
-
self.errored_states: list[str] = ["FAILED", "STOPPED", "TIMEOUT"]
|
73
96
|
self.next_log_tokens = GlueJobHook.LogContinuationTokens()
|
74
97
|
|
75
|
-
|
76
|
-
|
77
|
-
|
98
|
+
def execute(self, context: Context) -> Any:
|
99
|
+
if self.deferrable:
|
100
|
+
self.defer(
|
101
|
+
trigger=GlueJobCompleteTrigger(
|
102
|
+
job_name=self.job_name,
|
103
|
+
run_id=self.run_id,
|
104
|
+
verbose=self.verbose,
|
105
|
+
aws_conn_id=self.aws_conn_id,
|
106
|
+
waiter_delay=int(self.poke_interval),
|
107
|
+
waiter_max_attempts=self.max_retries,
|
108
|
+
region_name=self.region_name,
|
109
|
+
),
|
110
|
+
method_name="execute_complete",
|
111
|
+
)
|
112
|
+
else:
|
113
|
+
super().execute(context=context)
|
78
114
|
|
79
|
-
def
|
115
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
116
|
+
validated_event = validate_execute_complete_event(event)
|
117
|
+
|
118
|
+
if validated_event["status"] != "success":
|
119
|
+
message = f"Error: AWS Glue Job: {validated_event}"
|
120
|
+
raise AirflowException(message)
|
121
|
+
|
122
|
+
self.log.info("AWS Glue Job completed.")
|
123
|
+
|
124
|
+
def poke(self, context: Context) -> bool:
|
80
125
|
self.log.info("Poking for job run status :for Glue Job %s and ID %s", self.job_name, self.run_id)
|
81
126
|
job_state = self.hook.get_job_state(job_name=self.job_name, run_id=self.run_id)
|
82
127
|
|
83
128
|
try:
|
84
|
-
if job_state in self.
|
129
|
+
if job_state in self.SUCCESS_STATES:
|
85
130
|
self.log.info("Exiting Job %s Run State: %s", self.run_id, job_state)
|
86
131
|
return True
|
87
|
-
if job_state in self.
|
132
|
+
if job_state in self.FAILURE_STATES:
|
88
133
|
job_error_message = "Exiting Job %s Run State: %s", self.run_id, job_state
|
89
134
|
self.log.info(job_error_message)
|
90
135
|
raise AirflowException(job_error_message)
|
@@ -21,9 +21,9 @@ import fnmatch
|
|
21
21
|
import inspect
|
22
22
|
import os
|
23
23
|
import re
|
24
|
-
from collections.abc import Sequence
|
24
|
+
from collections.abc import Callable, Sequence
|
25
25
|
from datetime import datetime, timedelta
|
26
|
-
from typing import TYPE_CHECKING, Any,
|
26
|
+
from typing import TYPE_CHECKING, Any, cast
|
27
27
|
|
28
28
|
from airflow.configuration import conf
|
29
29
|
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
@@ -25,7 +25,7 @@ from airflow.exceptions import AirflowException
|
|
25
25
|
from airflow.providers.amazon.aws.hooks.sagemaker_unified_studio import (
|
26
26
|
SageMakerNotebookHook,
|
27
27
|
)
|
28
|
-
from airflow.
|
28
|
+
from airflow.providers.amazon.version_compat import BaseSensorOperator
|
29
29
|
|
30
30
|
if TYPE_CHECKING:
|
31
31
|
from airflow.utils.context import Context
|
@@ -22,8 +22,8 @@ import tempfile
|
|
22
22
|
from collections.abc import Sequence
|
23
23
|
from typing import TYPE_CHECKING
|
24
24
|
|
25
|
-
from airflow.models import BaseOperator
|
26
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
27
27
|
|
28
28
|
try:
|
29
29
|
from airflow.providers.microsoft.azure.hooks.wasb import WasbHook
|
@@ -21,8 +21,8 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
from collections.abc import Sequence
|
23
23
|
|
24
|
-
from airflow.models import BaseOperator
|
25
24
|
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
|
25
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
26
26
|
from airflow.utils.types import NOTSET, ArgNotSet
|
27
27
|
|
28
28
|
|
@@ -20,13 +20,13 @@ from __future__ import annotations
|
|
20
20
|
|
21
21
|
import json
|
22
22
|
import os
|
23
|
-
from collections.abc import Sequence
|
23
|
+
from collections.abc import Callable, Sequence
|
24
24
|
from copy import copy
|
25
25
|
from datetime import datetime
|
26
26
|
from decimal import Decimal
|
27
27
|
from functools import cached_property
|
28
28
|
from tempfile import NamedTemporaryFile
|
29
|
-
from typing import IO, TYPE_CHECKING, Any
|
29
|
+
from typing import IO, TYPE_CHECKING, Any
|
30
30
|
from uuid import uuid4
|
31
31
|
|
32
32
|
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook
|
@@ -23,8 +23,8 @@ from collections.abc import Sequence
|
|
23
23
|
from tempfile import NamedTemporaryFile
|
24
24
|
from typing import TYPE_CHECKING
|
25
25
|
|
26
|
-
from airflow.models import BaseOperator
|
27
26
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
27
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
28
28
|
from airflow.providers.exasol.hooks.exasol import ExasolHook
|
29
29
|
|
30
30
|
if TYPE_CHECKING:
|
@@ -21,8 +21,8 @@ from collections.abc import Sequence
|
|
21
21
|
from tempfile import NamedTemporaryFile
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
|
24
|
-
from airflow.models import BaseOperator
|
25
24
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
25
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
26
26
|
from airflow.providers.ftp.hooks.ftp import FTPHook
|
27
27
|
|
28
28
|
if TYPE_CHECKING:
|
@@ -26,8 +26,8 @@ from typing import TYPE_CHECKING
|
|
26
26
|
from packaging.version import Version
|
27
27
|
|
28
28
|
from airflow.exceptions import AirflowException
|
29
|
-
from airflow.models import BaseOperator
|
30
29
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
30
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
31
31
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
32
32
|
|
33
33
|
if TYPE_CHECKING:
|
@@ -21,8 +21,8 @@ import tempfile
|
|
21
21
|
from collections.abc import Sequence
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
|
24
|
-
from airflow.models import BaseOperator
|
25
24
|
from airflow.providers.amazon.aws.hooks.glacier import GlacierHook
|
25
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
26
26
|
from airflow.providers.google.cloud.hooks.gcs import GCSHook
|
27
27
|
|
28
28
|
if TYPE_CHECKING:
|
@@ -24,9 +24,9 @@ import sys
|
|
24
24
|
from collections.abc import Sequence
|
25
25
|
from typing import TYPE_CHECKING
|
26
26
|
|
27
|
-
from airflow.models import
|
28
|
-
from airflow.models.xcom import MAX_XCOM_SIZE, XCOM_RETURN_KEY
|
27
|
+
from airflow.models.xcom import XCOM_RETURN_KEY
|
29
28
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
29
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
30
30
|
from airflow.providers.google.common.hooks.discovery_api import GoogleDiscoveryApiHook
|
31
31
|
|
32
32
|
if TYPE_CHECKING:
|
@@ -36,6 +36,10 @@ if TYPE_CHECKING:
|
|
36
36
|
from airflow.models import TaskInstance as RuntimeTaskInstanceProtocol # type: ignore[assignment]
|
37
37
|
from airflow.utils.context import Context
|
38
38
|
|
39
|
+
# MAX XCOM Size is 48KB
|
40
|
+
# https://github.com/apache/airflow/pull/1618#discussion_r68249677
|
41
|
+
MAX_XCOM_SIZE = 49344
|
42
|
+
|
39
43
|
|
40
44
|
class GoogleApiToS3Operator(BaseOperator):
|
41
45
|
"""
|
@@ -20,11 +20,11 @@
|
|
20
20
|
from __future__ import annotations
|
21
21
|
|
22
22
|
import json
|
23
|
-
from collections.abc import Sequence
|
24
|
-
from typing import TYPE_CHECKING
|
23
|
+
from collections.abc import Callable, Sequence
|
24
|
+
from typing import TYPE_CHECKING
|
25
25
|
|
26
|
-
from airflow.models import BaseOperator
|
27
26
|
from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
|
27
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
28
28
|
from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook
|
29
29
|
|
30
30
|
if TYPE_CHECKING:
|
@@ -22,8 +22,8 @@ from __future__ import annotations
|
|
22
22
|
from functools import cached_property
|
23
23
|
from typing import TYPE_CHECKING, Any
|
24
24
|
|
25
|
-
from airflow.models import BaseOperator
|
26
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
27
27
|
from airflow.providers.http.hooks.http import HttpHook
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
@@ -22,8 +22,8 @@ from __future__ import annotations
|
|
22
22
|
from collections.abc import Sequence
|
23
23
|
from typing import TYPE_CHECKING
|
24
24
|
|
25
|
-
from airflow.models import BaseOperator
|
26
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
27
27
|
from airflow.providers.imap.hooks.imap import ImapHook
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
@@ -20,8 +20,8 @@ from __future__ import annotations
|
|
20
20
|
from collections.abc import Sequence
|
21
21
|
from typing import TYPE_CHECKING
|
22
22
|
|
23
|
-
from airflow.models import BaseOperator
|
24
23
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
24
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
25
25
|
|
26
26
|
if TYPE_CHECKING:
|
27
27
|
from airflow.utils.context import Context
|
@@ -23,8 +23,8 @@ from typing import TYPE_CHECKING, Any, cast
|
|
23
23
|
|
24
24
|
from bson import json_util
|
25
25
|
|
26
|
-
from airflow.models import BaseOperator
|
27
26
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
27
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
28
28
|
from airflow.providers.mongo.hooks.mongo import MongoHook
|
29
29
|
|
30
30
|
if TYPE_CHECKING:
|
@@ -24,11 +24,11 @@ from collections.abc import Iterable, Mapping, Sequence
|
|
24
24
|
from typing import TYPE_CHECKING
|
25
25
|
|
26
26
|
from airflow.exceptions import AirflowException
|
27
|
-
from airflow.models import BaseOperator
|
28
27
|
from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
|
29
28
|
from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
|
30
29
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
31
30
|
from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
|
31
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
32
32
|
from airflow.utils.types import NOTSET, ArgNotSet
|
33
33
|
|
34
34
|
if TYPE_CHECKING:
|
@@ -23,8 +23,8 @@ from typing import TYPE_CHECKING, Any, Literal, TypedDict
|
|
23
23
|
from botocore.exceptions import ClientError, WaiterError
|
24
24
|
|
25
25
|
from airflow.exceptions import AirflowException
|
26
|
-
from airflow.models import BaseOperator
|
27
26
|
from airflow.providers.amazon.aws.hooks.dynamodb import DynamoDBHook
|
27
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
30
30
|
from airflow.utils.context import Context
|
@@ -21,8 +21,8 @@ from collections.abc import Sequence
|
|
21
21
|
from tempfile import NamedTemporaryFile
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
|
24
|
-
from airflow.models import BaseOperator
|
25
24
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
25
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
26
26
|
from airflow.providers.ftp.hooks.ftp import FTPHook
|
27
27
|
|
28
28
|
if TYPE_CHECKING:
|
@@ -20,11 +20,11 @@ from collections.abc import Iterable, Sequence
|
|
20
20
|
from typing import TYPE_CHECKING
|
21
21
|
|
22
22
|
from airflow.exceptions import AirflowException
|
23
|
-
from airflow.models import BaseOperator
|
24
23
|
from airflow.providers.amazon.aws.hooks.redshift_data import RedshiftDataHook
|
25
24
|
from airflow.providers.amazon.aws.hooks.redshift_sql import RedshiftSQLHook
|
26
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
27
26
|
from airflow.providers.amazon.aws.utils.redshift import build_credentials_block
|
27
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
28
28
|
from airflow.utils.types import NOTSET, ArgNotSet
|
29
29
|
|
30
30
|
if TYPE_CHECKING:
|
@@ -22,8 +22,8 @@ from tempfile import NamedTemporaryFile
|
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
from urllib.parse import urlsplit
|
24
24
|
|
25
|
-
from airflow.models import BaseOperator
|
26
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
27
27
|
from airflow.providers.ssh.hooks.ssh import SSHHook
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
@@ -16,15 +16,14 @@
|
|
16
16
|
# under the License.
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
|
-
from collections.abc import Iterable, Sequence
|
19
|
+
from collections.abc import Callable, Iterable, Sequence
|
20
20
|
from functools import cached_property
|
21
21
|
from tempfile import NamedTemporaryFile
|
22
|
-
from typing import TYPE_CHECKING
|
22
|
+
from typing import TYPE_CHECKING
|
23
23
|
|
24
24
|
from airflow.exceptions import AirflowException
|
25
|
-
from airflow.hooks.base import BaseHook
|
26
|
-
from airflow.models import BaseOperator
|
27
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
|
28
27
|
|
29
28
|
if TYPE_CHECKING:
|
30
29
|
from airflow.utils.context import Context
|
@@ -21,8 +21,8 @@ import tempfile
|
|
21
21
|
from collections.abc import Sequence
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
|
24
|
-
from airflow.models import BaseOperator
|
25
24
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
25
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
26
26
|
from airflow.providers.salesforce.hooks.salesforce import SalesforceHook
|
27
27
|
|
28
28
|
if TYPE_CHECKING:
|
@@ -22,8 +22,8 @@ from tempfile import NamedTemporaryFile
|
|
22
22
|
from typing import TYPE_CHECKING
|
23
23
|
from urllib.parse import urlsplit
|
24
24
|
|
25
|
-
from airflow.models import BaseOperator
|
26
25
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
26
|
+
from airflow.providers.amazon.version_compat import BaseOperator
|
27
27
|
from airflow.providers.ssh.hooks.ssh import SSHHook
|
28
28
|
|
29
29
|
if TYPE_CHECKING:
|
@@ -22,14 +22,11 @@ import gzip
|
|
22
22
|
import io
|
23
23
|
from collections import namedtuple
|
24
24
|
from collections.abc import Iterable, Mapping, Sequence
|
25
|
-
from typing import TYPE_CHECKING, Any, cast
|
26
|
-
|
27
|
-
from typing_extensions import Literal
|
25
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
28
26
|
|
29
27
|
from airflow.exceptions import AirflowException
|
30
|
-
from airflow.hooks.base import BaseHook
|
31
|
-
from airflow.models import BaseOperator
|
32
28
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
29
|
+
from airflow.providers.amazon.version_compat import BaseHook, BaseOperator
|
33
30
|
|
34
31
|
if TYPE_CHECKING:
|
35
32
|
import pandas as pd
|
@@ -88,7 +88,6 @@ class AwsBaseWaiterTrigger(BaseTrigger):
|
|
88
88
|
super().__init__()
|
89
89
|
# parameters that should be hardcoded in the child's implem
|
90
90
|
self.serialized_fields = serialized_fields
|
91
|
-
|
92
91
|
self.waiter_name = waiter_name
|
93
92
|
self.waiter_args = waiter_args
|
94
93
|
self.failure_message = failure_message
|
@@ -31,49 +31,62 @@ from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
|
|
31
31
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
32
32
|
|
33
33
|
|
34
|
-
class GlueJobCompleteTrigger(
|
34
|
+
class GlueJobCompleteTrigger(AwsBaseWaiterTrigger):
|
35
35
|
"""
|
36
36
|
Watches for a glue job, triggers when it finishes.
|
37
37
|
|
38
38
|
:param job_name: glue job name
|
39
39
|
:param run_id: the ID of the specific run to watch for that job
|
40
40
|
:param verbose: whether to print the job's logs in airflow logs or not
|
41
|
-
:param
|
41
|
+
:param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
|
42
|
+
:param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
|
43
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials
|
44
|
+
:param region_name: Optional aws region name (example: us-east-1). Uses region from connection
|
45
|
+
if not specified.
|
46
|
+
:param verify: Whether or not to verify SSL certificates.
|
47
|
+
:param botocore_config: Configuration dictionary (key-values) for botocore client.
|
42
48
|
"""
|
43
49
|
|
44
50
|
def __init__(
|
45
51
|
self,
|
46
52
|
job_name: str,
|
47
53
|
run_id: str,
|
48
|
-
verbose: bool,
|
49
|
-
|
50
|
-
|
54
|
+
verbose: bool = False,
|
55
|
+
waiter_delay: int = 60,
|
56
|
+
waiter_max_attempts: int = 75,
|
57
|
+
aws_conn_id: str | None = "aws_default",
|
58
|
+
region_name: str | None = None,
|
59
|
+
verify: bool | str | None = None,
|
60
|
+
botocore_config: dict | None = None,
|
51
61
|
):
|
52
|
-
super().__init__(
|
62
|
+
super().__init__(
|
63
|
+
serialized_fields={"job_name": job_name, "run_id": run_id, "verbose": verbose},
|
64
|
+
waiter_name="job_complete",
|
65
|
+
waiter_args={"JobName": job_name, "RunId": run_id},
|
66
|
+
failure_message="AWS Glue job failed.",
|
67
|
+
status_message="Status of AWS Glue job is",
|
68
|
+
status_queries=["JobRun.JobRunState"],
|
69
|
+
return_key="run_id",
|
70
|
+
return_value=run_id,
|
71
|
+
waiter_delay=waiter_delay,
|
72
|
+
waiter_max_attempts=waiter_max_attempts,
|
73
|
+
aws_conn_id=aws_conn_id,
|
74
|
+
region_name=region_name,
|
75
|
+
verify=verify,
|
76
|
+
botocore_config=botocore_config,
|
77
|
+
)
|
53
78
|
self.job_name = job_name
|
54
79
|
self.run_id = run_id
|
55
80
|
self.verbose = verbose
|
56
|
-
self.aws_conn_id = aws_conn_id
|
57
|
-
self.job_poll_interval = job_poll_interval
|
58
81
|
|
59
|
-
def
|
60
|
-
return (
|
61
|
-
|
62
|
-
self.
|
63
|
-
|
64
|
-
|
65
|
-
"run_id": self.run_id,
|
66
|
-
"verbose": self.verbose,
|
67
|
-
"aws_conn_id": self.aws_conn_id,
|
68
|
-
"job_poll_interval": self.job_poll_interval,
|
69
|
-
},
|
82
|
+
def hook(self) -> AwsGenericHook:
|
83
|
+
return GlueJobHook(
|
84
|
+
aws_conn_id=self.aws_conn_id,
|
85
|
+
region_name=self.region_name,
|
86
|
+
verify=self.verify,
|
87
|
+
config=self.botocore_config,
|
70
88
|
)
|
71
89
|
|
72
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
73
|
-
hook = GlueJobHook(aws_conn_id=self.aws_conn_id, job_poll_interval=self.job_poll_interval)
|
74
|
-
await hook.async_job_completion(self.job_name, self.run_id, self.verbose)
|
75
|
-
yield TriggerEvent({"status": "success", "message": "Job done", "value": self.run_id})
|
76
|
-
|
77
90
|
|
78
91
|
class GlueCatalogPartitionTrigger(BaseTrigger):
|
79
92
|
"""
|
@@ -32,7 +32,10 @@ from airflow.utils.log.logging_mixin import LoggingMixin
|
|
32
32
|
from airflow.utils.types import NOTSET, ArgNotSet
|
33
33
|
|
34
34
|
if TYPE_CHECKING:
|
35
|
-
|
35
|
+
try:
|
36
|
+
from airflow.sdk import Connection
|
37
|
+
except ImportError:
|
38
|
+
from airflow.models.connection import Connection # type: ignore[assignment]
|
36
39
|
|
37
40
|
|
38
41
|
@dataclass
|
@@ -245,6 +248,12 @@ class AwsConnectionWrapper(LoggingMixin):
|
|
245
248
|
config_kwargs["signature_version"] = UNSIGNED
|
246
249
|
self.botocore_config = Config(**config_kwargs)
|
247
250
|
|
251
|
+
if "endpoint_url" not in extra:
|
252
|
+
self.log.debug(
|
253
|
+
"Missing endpoint_url in extra config of AWS Connection with id %s. Using default AWS service endpoint",
|
254
|
+
conn.conn_id,
|
255
|
+
)
|
256
|
+
|
248
257
|
self.endpoint_url = extra.get("endpoint_url")
|
249
258
|
|
250
259
|
# Retrieve Assume Role Configuration
|
@@ -28,8 +28,9 @@ Module for suppress errors in Amazon Provider.
|
|
28
28
|
from __future__ import annotations
|
29
29
|
|
30
30
|
import logging
|
31
|
+
from collections.abc import Callable
|
31
32
|
from functools import wraps
|
32
|
-
from typing import
|
33
|
+
from typing import TypeVar
|
33
34
|
|
34
35
|
from airflow.typing_compat import ParamSpec
|
35
36
|
|
@@ -1,6 +1,61 @@
|
|
1
1
|
{
|
2
2
|
"version": 2,
|
3
3
|
"waiters": {
|
4
|
+
"job_complete": {
|
5
|
+
"operation": "GetJobRun",
|
6
|
+
"delay": 60,
|
7
|
+
"maxAttempts": 75,
|
8
|
+
"acceptors": [
|
9
|
+
{
|
10
|
+
"matcher": "path",
|
11
|
+
"argument": "JobRun.JobRunState",
|
12
|
+
"expected": "STARTING",
|
13
|
+
"state": "retry"
|
14
|
+
},
|
15
|
+
{
|
16
|
+
"matcher": "path",
|
17
|
+
"argument": "JobRun.JobRunState",
|
18
|
+
"expected": "RUNNING",
|
19
|
+
"state": "retry"
|
20
|
+
},
|
21
|
+
{
|
22
|
+
"matcher": "path",
|
23
|
+
"argument": "JobRun.JobRunState",
|
24
|
+
"expected": "STOPPING",
|
25
|
+
"state": "retry"
|
26
|
+
},
|
27
|
+
{
|
28
|
+
"matcher": "path",
|
29
|
+
"argument": "JobRun.JobRunState",
|
30
|
+
"expected": "STOPPED",
|
31
|
+
"state": "failure"
|
32
|
+
},
|
33
|
+
{
|
34
|
+
"matcher": "path",
|
35
|
+
"argument": "JobRun.JobRunState",
|
36
|
+
"expected": "FAILED",
|
37
|
+
"state": "failure"
|
38
|
+
},
|
39
|
+
{
|
40
|
+
"matcher": "path",
|
41
|
+
"argument": "JobRun.JobRunState",
|
42
|
+
"expected": "ERROR",
|
43
|
+
"state": "failure"
|
44
|
+
},
|
45
|
+
{
|
46
|
+
"matcher": "path",
|
47
|
+
"argument": "JobRun.JobRunState",
|
48
|
+
"expected": "TIMEOUT",
|
49
|
+
"state": "failure"
|
50
|
+
},
|
51
|
+
{
|
52
|
+
"matcher": "path",
|
53
|
+
"argument": "JobRun.JobRunState",
|
54
|
+
"expected": "SUCCEEDED",
|
55
|
+
"state": "success"
|
56
|
+
}
|
57
|
+
]
|
58
|
+
},
|
4
59
|
"crawler_ready": {
|
5
60
|
"operation": "GetCrawler",
|
6
61
|
"delay": 5,
|