apache-airflow-providers-amazon 8.17.0rc2__py3-none-any.whl → 8.18.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +1 -1
- airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py +3 -3
- airflow/providers/amazon/aws/auth_manager/cli/definition.py +14 -0
- airflow/providers/amazon/aws/auth_manager/cli/idc_commands.py +148 -0
- airflow/providers/amazon/aws/auth_manager/views/auth.py +1 -1
- airflow/providers/amazon/aws/executors/ecs/Dockerfile +3 -3
- airflow/providers/amazon/aws/executors/ecs/boto_schema.py +1 -1
- airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +40 -17
- airflow/providers/amazon/aws/executors/ecs/utils.py +9 -7
- airflow/providers/amazon/aws/executors/utils/exponential_backoff_retry.py +23 -4
- airflow/providers/amazon/aws/hooks/athena.py +15 -2
- airflow/providers/amazon/aws/hooks/base_aws.py +16 -14
- airflow/providers/amazon/aws/hooks/emr.py +6 -0
- airflow/providers/amazon/aws/hooks/logs.py +85 -1
- airflow/providers/amazon/aws/hooks/neptune.py +85 -0
- airflow/providers/amazon/aws/hooks/quicksight.py +9 -8
- airflow/providers/amazon/aws/hooks/redshift_cluster.py +8 -7
- airflow/providers/amazon/aws/hooks/redshift_sql.py +3 -3
- airflow/providers/amazon/aws/hooks/s3.py +4 -6
- airflow/providers/amazon/aws/hooks/sagemaker.py +136 -9
- airflow/providers/amazon/aws/links/emr.py +122 -2
- airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +2 -2
- airflow/providers/amazon/aws/operators/athena.py +4 -1
- airflow/providers/amazon/aws/operators/batch.py +5 -6
- airflow/providers/amazon/aws/operators/ecs.py +6 -2
- airflow/providers/amazon/aws/operators/eks.py +31 -26
- airflow/providers/amazon/aws/operators/emr.py +192 -26
- airflow/providers/amazon/aws/operators/glue.py +5 -2
- airflow/providers/amazon/aws/operators/glue_crawler.py +5 -2
- airflow/providers/amazon/aws/operators/glue_databrew.py +5 -2
- airflow/providers/amazon/aws/operators/lambda_function.py +3 -0
- airflow/providers/amazon/aws/operators/neptune.py +218 -0
- airflow/providers/amazon/aws/operators/rds.py +21 -12
- airflow/providers/amazon/aws/operators/redshift_cluster.py +12 -18
- airflow/providers/amazon/aws/operators/redshift_data.py +2 -4
- airflow/providers/amazon/aws/operators/sagemaker.py +94 -31
- airflow/providers/amazon/aws/operators/step_function.py +4 -1
- airflow/providers/amazon/aws/sensors/batch.py +2 -2
- airflow/providers/amazon/aws/sensors/ec2.py +4 -2
- airflow/providers/amazon/aws/sensors/emr.py +13 -6
- airflow/providers/amazon/aws/sensors/glue_catalog_partition.py +4 -1
- airflow/providers/amazon/aws/sensors/quicksight.py +17 -14
- airflow/providers/amazon/aws/sensors/redshift_cluster.py +2 -4
- airflow/providers/amazon/aws/sensors/s3.py +3 -0
- airflow/providers/amazon/aws/sensors/sqs.py +4 -1
- airflow/providers/amazon/aws/transfers/s3_to_redshift.py +1 -0
- airflow/providers/amazon/aws/transfers/sql_to_s3.py +31 -3
- airflow/providers/amazon/aws/triggers/neptune.py +115 -0
- airflow/providers/amazon/aws/triggers/rds.py +9 -7
- airflow/providers/amazon/aws/triggers/redshift_cluster.py +2 -2
- airflow/providers/amazon/aws/triggers/redshift_data.py +1 -1
- airflow/providers/amazon/aws/triggers/sagemaker.py +82 -1
- airflow/providers/amazon/aws/utils/__init__.py +10 -0
- airflow/providers/amazon/aws/utils/connection_wrapper.py +12 -8
- airflow/providers/amazon/aws/utils/mixins.py +5 -1
- airflow/providers/amazon/aws/utils/task_log_fetcher.py +2 -2
- airflow/providers/amazon/aws/waiters/neptune.json +85 -0
- airflow/providers/amazon/get_provider_info.py +26 -2
- {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/METADATA +6 -6
- {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/RECORD +62 -57
- {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/entry_points.txt +0 -0
@@ -44,7 +44,7 @@ from airflow.providers.amazon.aws.triggers.batch import (
|
|
44
44
|
BatchCreateComputeEnvironmentTrigger,
|
45
45
|
BatchJobTrigger,
|
46
46
|
)
|
47
|
-
from airflow.providers.amazon.aws.utils import trim_none_values
|
47
|
+
from airflow.providers.amazon.aws.utils import trim_none_values, validate_execute_complete_event
|
48
48
|
from airflow.providers.amazon.aws.utils.task_log_fetcher import AwsTaskLogFetcher
|
49
49
|
|
50
50
|
if TYPE_CHECKING:
|
@@ -269,10 +269,7 @@ class BatchOperator(BaseOperator):
|
|
269
269
|
return self.job_id
|
270
270
|
|
271
271
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
272
|
-
|
273
|
-
err_msg = "Trigger error: event is None"
|
274
|
-
self.log.info(err_msg)
|
275
|
-
raise AirflowException(err_msg)
|
272
|
+
event = validate_execute_complete_event(event)
|
276
273
|
|
277
274
|
if event["status"] != "success":
|
278
275
|
raise AirflowException(f"Error while running job: {event}")
|
@@ -541,7 +538,9 @@ class BatchCreateComputeEnvironmentOperator(BaseOperator):
|
|
541
538
|
self.log.info("AWS Batch compute environment created successfully")
|
542
539
|
return arn
|
543
540
|
|
544
|
-
def execute_complete(self, context, event=None):
|
541
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
542
|
+
event = validate_execute_complete_event(event)
|
543
|
+
|
545
544
|
if event["status"] != "success":
|
546
545
|
raise AirflowException(f"Error while waiting for the compute environment to be ready: {event}")
|
547
546
|
return event["value"]
|
@@ -21,7 +21,7 @@ import re
|
|
21
21
|
import warnings
|
22
22
|
from datetime import timedelta
|
23
23
|
from functools import cached_property
|
24
|
-
from typing import TYPE_CHECKING, Sequence
|
24
|
+
from typing import TYPE_CHECKING, Any, Sequence
|
25
25
|
|
26
26
|
from airflow.configuration import conf
|
27
27
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -35,6 +35,7 @@ from airflow.providers.amazon.aws.triggers.ecs import (
|
|
35
35
|
ClusterInactiveTrigger,
|
36
36
|
TaskDoneTrigger,
|
37
37
|
)
|
38
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
38
39
|
from airflow.providers.amazon.aws.utils.identifiers import generate_uuid
|
39
40
|
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
40
41
|
from airflow.providers.amazon.aws.utils.task_log_fetcher import AwsTaskLogFetcher
|
@@ -580,7 +581,9 @@ class EcsRunTaskOperator(EcsBaseOperator):
|
|
580
581
|
else:
|
581
582
|
return None
|
582
583
|
|
583
|
-
def execute_complete(self, context, event=None):
|
584
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str | None:
|
585
|
+
event = validate_execute_complete_event(event)
|
586
|
+
|
584
587
|
if event["status"] != "success":
|
585
588
|
raise AirflowException(f"Error in task execution: {event}")
|
586
589
|
self.arn = event["task_arn"] # restore arn to its updated value, needed for next steps
|
@@ -596,6 +599,7 @@ class EcsRunTaskOperator(EcsBaseOperator):
|
|
596
599
|
)
|
597
600
|
if len(one_log["events"]) > 0:
|
598
601
|
return one_log["events"][0]["message"]
|
602
|
+
return None
|
599
603
|
|
600
604
|
def _after_execution(self):
|
601
605
|
self._check_success_task()
|
@@ -25,6 +25,7 @@ from functools import cached_property
|
|
25
25
|
from typing import TYPE_CHECKING, Any, List, Sequence, cast
|
26
26
|
|
27
27
|
from botocore.exceptions import ClientError, WaiterError
|
28
|
+
from deprecated import deprecated
|
28
29
|
|
29
30
|
from airflow.configuration import conf
|
30
31
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
@@ -38,6 +39,7 @@ from airflow.providers.amazon.aws.triggers.eks import (
|
|
38
39
|
EksDeleteFargateProfileTrigger,
|
39
40
|
EksDeleteNodegroupTrigger,
|
40
41
|
)
|
42
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
41
43
|
from airflow.providers.amazon.aws.utils.waiter_with_logging import wait
|
42
44
|
from airflow.providers.cncf.kubernetes.utils.pod_manager import OnFinishAction
|
43
45
|
|
@@ -263,13 +265,14 @@ class EksCreateClusterOperator(BaseOperator):
|
|
263
265
|
return EksHook(aws_conn_id=self.aws_conn_id, region_name=self.region)
|
264
266
|
|
265
267
|
@property
|
266
|
-
|
267
|
-
|
268
|
+
@deprecated(
|
269
|
+
reason=(
|
268
270
|
"`eks_hook` property is deprecated and will be removed in the future. "
|
269
|
-
"Please use `hook` property instead."
|
270
|
-
|
271
|
-
|
272
|
-
|
271
|
+
"Please use `hook` property instead."
|
272
|
+
),
|
273
|
+
category=AirflowProviderDeprecationWarning,
|
274
|
+
)
|
275
|
+
def eks_hook(self):
|
273
276
|
return self.hook
|
274
277
|
|
275
278
|
def execute(self, context: Context):
|
@@ -419,11 +422,10 @@ class EksCreateClusterOperator(BaseOperator):
|
|
419
422
|
raise AirflowException("Error creating cluster")
|
420
423
|
|
421
424
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
425
|
+
event = validate_execute_complete_event(event)
|
426
|
+
|
422
427
|
resource = "fargate profile" if self.compute == "fargate" else self.compute
|
423
|
-
if event
|
424
|
-
self.log.info("Trigger error: event is None")
|
425
|
-
raise AirflowException("Trigger error: event is None")
|
426
|
-
elif event["status"] != "success":
|
428
|
+
if event["status"] != "success":
|
427
429
|
raise AirflowException(f"Error creating {resource}: {event}")
|
428
430
|
|
429
431
|
self.log.info("%s created successfully", resource)
|
@@ -545,10 +547,11 @@ class EksCreateNodegroupOperator(BaseOperator):
|
|
545
547
|
timeout=timedelta(seconds=self.waiter_max_attempts * self.waiter_delay + 60),
|
546
548
|
)
|
547
549
|
|
548
|
-
def execute_complete(self, context, event=None):
|
550
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
551
|
+
event = validate_execute_complete_event(event)
|
552
|
+
|
549
553
|
if event["status"] != "success":
|
550
554
|
raise AirflowException(f"Error creating nodegroup: {event}")
|
551
|
-
return
|
552
555
|
|
553
556
|
|
554
557
|
class EksCreateFargateProfileOperator(BaseOperator):
|
@@ -654,12 +657,13 @@ class EksCreateFargateProfileOperator(BaseOperator):
|
|
654
657
|
timeout=timedelta(seconds=(self.waiter_max_attempts * self.waiter_delay + 60)),
|
655
658
|
)
|
656
659
|
|
657
|
-
def execute_complete(self, context, event=None):
|
660
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
661
|
+
event = validate_execute_complete_event(event)
|
662
|
+
|
658
663
|
if event["status"] != "success":
|
659
664
|
raise AirflowException(f"Error creating Fargate profile: {event}")
|
660
|
-
|
661
|
-
|
662
|
-
return
|
665
|
+
|
666
|
+
self.log.info("Fargate profile created successfully")
|
663
667
|
|
664
668
|
|
665
669
|
class EksDeleteClusterOperator(BaseOperator):
|
@@ -786,10 +790,9 @@ class EksDeleteClusterOperator(BaseOperator):
|
|
786
790
|
self.log.info(SUCCESS_MSG.format(compute=FARGATE_FULL_NAME))
|
787
791
|
|
788
792
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
elif event["status"] == "success":
|
793
|
+
event = validate_execute_complete_event(event)
|
794
|
+
|
795
|
+
if event["status"] == "success":
|
793
796
|
self.log.info("Cluster deleted successfully.")
|
794
797
|
|
795
798
|
|
@@ -877,10 +880,11 @@ class EksDeleteNodegroupOperator(BaseOperator):
|
|
877
880
|
clusterName=self.cluster_name, nodegroupName=self.nodegroup_name
|
878
881
|
)
|
879
882
|
|
880
|
-
def execute_complete(self, context, event=None):
|
883
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
884
|
+
event = validate_execute_complete_event(event)
|
885
|
+
|
881
886
|
if event["status"] != "success":
|
882
887
|
raise AirflowException(f"Error deleting nodegroup: {event}")
|
883
|
-
return
|
884
888
|
|
885
889
|
|
886
890
|
class EksDeleteFargateProfileOperator(BaseOperator):
|
@@ -970,12 +974,13 @@ class EksDeleteFargateProfileOperator(BaseOperator):
|
|
970
974
|
WaiterConfig={"Delay": self.waiter_delay, "MaxAttempts": self.waiter_max_attempts},
|
971
975
|
)
|
972
976
|
|
973
|
-
def execute_complete(self, context, event=None):
|
977
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
978
|
+
event = validate_execute_complete_event(event)
|
979
|
+
|
974
980
|
if event["status"] != "success":
|
975
981
|
raise AirflowException(f"Error deleting Fargate profile: {event}")
|
976
|
-
|
977
|
-
|
978
|
-
return
|
982
|
+
|
983
|
+
self.log.info("Fargate profile deleted successfully")
|
979
984
|
|
980
985
|
|
981
986
|
class EksPodOperator(KubernetesPodOperator):
|
@@ -27,8 +27,17 @@ from uuid import uuid4
|
|
27
27
|
from airflow.configuration import conf
|
28
28
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
29
29
|
from airflow.models import BaseOperator
|
30
|
+
from airflow.models.mappedoperator import MappedOperator
|
30
31
|
from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
|
31
|
-
from airflow.providers.amazon.aws.links.emr import
|
32
|
+
from airflow.providers.amazon.aws.links.emr import (
|
33
|
+
EmrClusterLink,
|
34
|
+
EmrLogsLink,
|
35
|
+
EmrServerlessCloudWatchLogsLink,
|
36
|
+
EmrServerlessDashboardLink,
|
37
|
+
EmrServerlessLogsLink,
|
38
|
+
EmrServerlessS3LogsLink,
|
39
|
+
get_log_uri,
|
40
|
+
)
|
32
41
|
from airflow.providers.amazon.aws.triggers.emr import (
|
33
42
|
EmrAddStepsTrigger,
|
34
43
|
EmrContainerTrigger,
|
@@ -41,6 +50,7 @@ from airflow.providers.amazon.aws.triggers.emr import (
|
|
41
50
|
EmrServerlessStopApplicationTrigger,
|
42
51
|
EmrTerminateJobFlowTrigger,
|
43
52
|
)
|
53
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
44
54
|
from airflow.providers.amazon.aws.utils.waiter import waiter
|
45
55
|
from airflow.providers.amazon.aws.utils.waiter_with_logging import wait
|
46
56
|
from airflow.utils.helpers import exactly_one, prune_dict
|
@@ -180,11 +190,13 @@ class EmrAddStepsOperator(BaseOperator):
|
|
180
190
|
|
181
191
|
return step_ids
|
182
192
|
|
183
|
-
def execute_complete(self, context, event=None):
|
193
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
194
|
+
event = validate_execute_complete_event(event)
|
195
|
+
|
184
196
|
if event["status"] != "success":
|
185
197
|
raise AirflowException(f"Error while running steps: {event}")
|
186
|
-
|
187
|
-
|
198
|
+
|
199
|
+
self.log.info("Steps completed successfully")
|
188
200
|
return event["value"]
|
189
201
|
|
190
202
|
|
@@ -494,6 +506,8 @@ class EmrContainerOperator(BaseOperator):
|
|
494
506
|
:param max_tries: Deprecated - use max_polling_attempts instead.
|
495
507
|
:param max_polling_attempts: Maximum number of times to wait for the job run to finish.
|
496
508
|
Defaults to None, which will poll until the job is *not* in a pending, submitted, or running state.
|
509
|
+
:param job_retry_max_attempts: Maximum number of times to retry when the EMR job fails.
|
510
|
+
Defaults to None, which disable the retry.
|
497
511
|
:param tags: The tags assigned to job runs.
|
498
512
|
Defaults to None
|
499
513
|
:param deferrable: Run operator in the deferrable mode.
|
@@ -525,6 +539,7 @@ class EmrContainerOperator(BaseOperator):
|
|
525
539
|
max_tries: int | None = None,
|
526
540
|
tags: dict | None = None,
|
527
541
|
max_polling_attempts: int | None = None,
|
542
|
+
job_retry_max_attempts: int | None = None,
|
528
543
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
529
544
|
**kwargs: Any,
|
530
545
|
) -> None:
|
@@ -540,6 +555,7 @@ class EmrContainerOperator(BaseOperator):
|
|
540
555
|
self.wait_for_completion = wait_for_completion
|
541
556
|
self.poll_interval = poll_interval
|
542
557
|
self.max_polling_attempts = max_polling_attempts
|
558
|
+
self.job_retry_max_attempts = job_retry_max_attempts
|
543
559
|
self.tags = tags
|
544
560
|
self.job_id: str | None = None
|
545
561
|
self.deferrable = deferrable
|
@@ -574,6 +590,7 @@ class EmrContainerOperator(BaseOperator):
|
|
574
590
|
self.configuration_overrides,
|
575
591
|
self.client_request_token,
|
576
592
|
self.tags,
|
593
|
+
self.job_retry_max_attempts,
|
577
594
|
)
|
578
595
|
if self.deferrable:
|
579
596
|
query_status = self.hook.check_query_status(job_id=self.job_id)
|
@@ -619,7 +636,9 @@ class EmrContainerOperator(BaseOperator):
|
|
619
636
|
f"query_execution_id is {self.job_id}. Error: {error_message}"
|
620
637
|
)
|
621
638
|
|
622
|
-
def execute_complete(self, context, event=None):
|
639
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
640
|
+
event = validate_execute_complete_event(event)
|
641
|
+
|
623
642
|
if event["status"] != "success":
|
624
643
|
raise AirflowException(f"Error while running job: {event}")
|
625
644
|
|
@@ -806,11 +825,13 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
806
825
|
)
|
807
826
|
return self._job_flow_id
|
808
827
|
|
809
|
-
def execute_complete(self, context, event=None):
|
828
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
829
|
+
event = validate_execute_complete_event(event)
|
830
|
+
|
810
831
|
if event["status"] != "success":
|
811
832
|
raise AirflowException(f"Error creating jobFlow: {event}")
|
812
|
-
|
813
|
-
|
833
|
+
|
834
|
+
self.log.info("JobFlow created successfully")
|
814
835
|
return event["job_flow_id"]
|
815
836
|
|
816
837
|
def on_kill(self) -> None:
|
@@ -969,12 +990,13 @@ class EmrTerminateJobFlowOperator(BaseOperator):
|
|
969
990
|
timeout=timedelta(seconds=self.waiter_max_attempts * self.waiter_delay + 60),
|
970
991
|
)
|
971
992
|
|
972
|
-
def execute_complete(self, context, event=None):
|
993
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
994
|
+
event = validate_execute_complete_event(event)
|
995
|
+
|
973
996
|
if event["status"] != "success":
|
974
997
|
raise AirflowException(f"Error terminating JobFlow: {event}")
|
975
|
-
|
976
|
-
|
977
|
-
return
|
998
|
+
|
999
|
+
self.log.info("Jobflow terminated successfully.")
|
978
1000
|
|
979
1001
|
|
980
1002
|
class EmrServerlessCreateApplicationOperator(BaseOperator):
|
@@ -1135,7 +1157,9 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
1135
1157
|
)
|
1136
1158
|
|
1137
1159
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
1138
|
-
|
1160
|
+
event = validate_execute_complete_event(event)
|
1161
|
+
|
1162
|
+
if event["status"] != "success":
|
1139
1163
|
raise AirflowException(f"Trigger error: Application failed to start, event is {event}")
|
1140
1164
|
|
1141
1165
|
self.log.info("Application %s started", event["application_id"])
|
@@ -1172,6 +1196,9 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1172
1196
|
:param deferrable: If True, the operator will wait asynchronously for the crawl to complete.
|
1173
1197
|
This implies waiting for completion. This mode requires aiobotocore module to be installed.
|
1174
1198
|
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
1199
|
+
:param enable_application_ui_links: If True, the operator will generate one-time links to EMR Serverless
|
1200
|
+
application UIs. The generated links will allow any user with access to the DAG to see the Spark or
|
1201
|
+
Tez UI or Spark stdout logs. Defaults to False.
|
1175
1202
|
"""
|
1176
1203
|
|
1177
1204
|
template_fields: Sequence[str] = (
|
@@ -1181,6 +1208,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1181
1208
|
"job_driver",
|
1182
1209
|
"configuration_overrides",
|
1183
1210
|
"name",
|
1211
|
+
"aws_conn_id",
|
1184
1212
|
)
|
1185
1213
|
|
1186
1214
|
template_fields_renderers = {
|
@@ -1188,12 +1216,48 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1188
1216
|
"configuration_overrides": "json",
|
1189
1217
|
}
|
1190
1218
|
|
1219
|
+
@property
|
1220
|
+
def operator_extra_links(self):
|
1221
|
+
"""
|
1222
|
+
Dynamically add extra links depending on the job type and if they're enabled.
|
1223
|
+
|
1224
|
+
If S3 or CloudWatch monitoring configurations exist, add links directly to the relevant consoles.
|
1225
|
+
Only add dashboard links if they're explicitly enabled. These are one-time links that any user
|
1226
|
+
can access, but expire on first click or one hour, whichever comes first.
|
1227
|
+
"""
|
1228
|
+
op_extra_links = []
|
1229
|
+
|
1230
|
+
if isinstance(self, MappedOperator):
|
1231
|
+
enable_application_ui_links = self.partial_kwargs.get(
|
1232
|
+
"enable_application_ui_links"
|
1233
|
+
) or self.expand_input.value.get("enable_application_ui_links")
|
1234
|
+
job_driver = self.partial_kwargs.get("job_driver") or self.expand_input.value.get("job_driver")
|
1235
|
+
configuration_overrides = self.partial_kwargs.get(
|
1236
|
+
"configuration_overrides"
|
1237
|
+
) or self.expand_input.value.get("configuration_overrides")
|
1238
|
+
|
1239
|
+
else:
|
1240
|
+
enable_application_ui_links = self.enable_application_ui_links
|
1241
|
+
configuration_overrides = self.configuration_overrides
|
1242
|
+
job_driver = self.job_driver
|
1243
|
+
|
1244
|
+
if enable_application_ui_links:
|
1245
|
+
op_extra_links.extend([EmrServerlessDashboardLink()])
|
1246
|
+
if "sparkSubmit" in job_driver:
|
1247
|
+
op_extra_links.extend([EmrServerlessLogsLink()])
|
1248
|
+
if self.is_monitoring_in_job_override("s3MonitoringConfiguration", configuration_overrides):
|
1249
|
+
op_extra_links.extend([EmrServerlessS3LogsLink()])
|
1250
|
+
if self.is_monitoring_in_job_override("cloudWatchLoggingConfiguration", configuration_overrides):
|
1251
|
+
op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
|
1252
|
+
|
1253
|
+
return tuple(op_extra_links)
|
1254
|
+
|
1191
1255
|
def __init__(
|
1192
1256
|
self,
|
1193
1257
|
application_id: str,
|
1194
1258
|
execution_role_arn: str,
|
1195
1259
|
job_driver: dict,
|
1196
|
-
configuration_overrides: dict | None,
|
1260
|
+
configuration_overrides: dict | None = None,
|
1197
1261
|
client_request_token: str = "",
|
1198
1262
|
config: dict | None = None,
|
1199
1263
|
wait_for_completion: bool = True,
|
@@ -1204,6 +1268,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1204
1268
|
waiter_max_attempts: int | ArgNotSet = NOTSET,
|
1205
1269
|
waiter_delay: int | ArgNotSet = NOTSET,
|
1206
1270
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
1271
|
+
enable_application_ui_links: bool = False,
|
1207
1272
|
**kwargs,
|
1208
1273
|
):
|
1209
1274
|
if waiter_check_interval_seconds is NOTSET:
|
@@ -1243,6 +1308,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1243
1308
|
self.waiter_delay = int(waiter_delay) # type: ignore[arg-type]
|
1244
1309
|
self.job_id: str | None = None
|
1245
1310
|
self.deferrable = deferrable
|
1311
|
+
self.enable_application_ui_links = enable_application_ui_links
|
1246
1312
|
super().__init__(**kwargs)
|
1247
1313
|
|
1248
1314
|
self.client_request_token = client_request_token or str(uuid4())
|
@@ -1300,6 +1366,9 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1300
1366
|
|
1301
1367
|
self.job_id = response["jobRunId"]
|
1302
1368
|
self.log.info("EMR serverless job started: %s", self.job_id)
|
1369
|
+
|
1370
|
+
self.persist_links(context)
|
1371
|
+
|
1303
1372
|
if self.deferrable:
|
1304
1373
|
self.defer(
|
1305
1374
|
trigger=EmrServerlessStartJobTrigger(
|
@@ -1312,6 +1381,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1312
1381
|
method_name="execute_complete",
|
1313
1382
|
timeout=timedelta(seconds=self.waiter_max_attempts * self.waiter_delay),
|
1314
1383
|
)
|
1384
|
+
|
1315
1385
|
if self.wait_for_completion:
|
1316
1386
|
waiter = self.hook.get_waiter("serverless_job_completed")
|
1317
1387
|
wait(
|
@@ -1327,10 +1397,9 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1327
1397
|
return self.job_id
|
1328
1398
|
|
1329
1399
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
elif event["status"] == "success":
|
1400
|
+
event = validate_execute_complete_event(event)
|
1401
|
+
|
1402
|
+
if event["status"] == "success":
|
1334
1403
|
self.log.info("Serverless job completed")
|
1335
1404
|
return event["job_id"]
|
1336
1405
|
|
@@ -1369,6 +1438,105 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1369
1438
|
check_interval_seconds=self.waiter_delay,
|
1370
1439
|
)
|
1371
1440
|
|
1441
|
+
def is_monitoring_in_job_override(self, config_key: str, job_override: dict | None) -> bool:
|
1442
|
+
"""
|
1443
|
+
Check if monitoring is enabled for the job.
|
1444
|
+
|
1445
|
+
Note: This is not compatible with application defaults:
|
1446
|
+
https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/default-configs.html
|
1447
|
+
|
1448
|
+
This is used to determine what extra links should be shown.
|
1449
|
+
"""
|
1450
|
+
monitoring_config = (job_override or {}).get("monitoringConfiguration")
|
1451
|
+
if monitoring_config is None or config_key not in monitoring_config:
|
1452
|
+
return False
|
1453
|
+
|
1454
|
+
# CloudWatch can have an "enabled" flag set to False
|
1455
|
+
if config_key == "cloudWatchLoggingConfiguration":
|
1456
|
+
return monitoring_config.get(config_key).get("enabled") is True
|
1457
|
+
|
1458
|
+
return config_key in monitoring_config
|
1459
|
+
|
1460
|
+
def persist_links(self, context: Context):
|
1461
|
+
"""Populate the relevant extra links for the EMR Serverless jobs."""
|
1462
|
+
# Persist the EMR Serverless Dashboard link (Spark/Tez UI)
|
1463
|
+
if self.enable_application_ui_links:
|
1464
|
+
EmrServerlessDashboardLink.persist(
|
1465
|
+
context=context,
|
1466
|
+
operator=self,
|
1467
|
+
region_name=self.hook.conn_region_name,
|
1468
|
+
aws_partition=self.hook.conn_partition,
|
1469
|
+
conn_id=self.hook.aws_conn_id,
|
1470
|
+
application_id=self.application_id,
|
1471
|
+
job_run_id=self.job_id,
|
1472
|
+
)
|
1473
|
+
|
1474
|
+
# If this is a Spark job, persist the EMR Serverless logs link (Driver stdout)
|
1475
|
+
if self.enable_application_ui_links and "sparkSubmit" in self.job_driver:
|
1476
|
+
EmrServerlessLogsLink.persist(
|
1477
|
+
context=context,
|
1478
|
+
operator=self,
|
1479
|
+
region_name=self.hook.conn_region_name,
|
1480
|
+
aws_partition=self.hook.conn_partition,
|
1481
|
+
conn_id=self.hook.aws_conn_id,
|
1482
|
+
application_id=self.application_id,
|
1483
|
+
job_run_id=self.job_id,
|
1484
|
+
)
|
1485
|
+
|
1486
|
+
# Add S3 and/or CloudWatch links if either is enabled
|
1487
|
+
if self.is_monitoring_in_job_override("s3MonitoringConfiguration", self.configuration_overrides):
|
1488
|
+
log_uri = (
|
1489
|
+
(self.configuration_overrides or {})
|
1490
|
+
.get("monitoringConfiguration", {})
|
1491
|
+
.get("s3MonitoringConfiguration", {})
|
1492
|
+
.get("logUri")
|
1493
|
+
)
|
1494
|
+
EmrServerlessS3LogsLink.persist(
|
1495
|
+
context=context,
|
1496
|
+
operator=self,
|
1497
|
+
region_name=self.hook.conn_region_name,
|
1498
|
+
aws_partition=self.hook.conn_partition,
|
1499
|
+
log_uri=log_uri,
|
1500
|
+
application_id=self.application_id,
|
1501
|
+
job_run_id=self.job_id,
|
1502
|
+
)
|
1503
|
+
emrs_s3_url = EmrServerlessS3LogsLink().format_link(
|
1504
|
+
aws_domain=EmrServerlessCloudWatchLogsLink.get_aws_domain(self.hook.conn_partition),
|
1505
|
+
region_name=self.hook.conn_region_name,
|
1506
|
+
aws_partition=self.hook.conn_partition,
|
1507
|
+
log_uri=log_uri,
|
1508
|
+
application_id=self.application_id,
|
1509
|
+
job_run_id=self.job_id,
|
1510
|
+
)
|
1511
|
+
self.log.info("S3 logs available at: %s", emrs_s3_url)
|
1512
|
+
|
1513
|
+
if self.is_monitoring_in_job_override("cloudWatchLoggingConfiguration", self.configuration_overrides):
|
1514
|
+
cloudwatch_config = (
|
1515
|
+
(self.configuration_overrides or {})
|
1516
|
+
.get("monitoringConfiguration", {})
|
1517
|
+
.get("cloudWatchLoggingConfiguration", {})
|
1518
|
+
)
|
1519
|
+
log_group_name = cloudwatch_config.get("logGroupName", "/aws/emr-serverless")
|
1520
|
+
log_stream_prefix = cloudwatch_config.get("logStreamNamePrefix", "")
|
1521
|
+
log_stream_prefix = f"{log_stream_prefix}/applications/{self.application_id}/jobs/{self.job_id}"
|
1522
|
+
|
1523
|
+
EmrServerlessCloudWatchLogsLink.persist(
|
1524
|
+
context=context,
|
1525
|
+
operator=self,
|
1526
|
+
region_name=self.hook.conn_region_name,
|
1527
|
+
aws_partition=self.hook.conn_partition,
|
1528
|
+
awslogs_group=log_group_name,
|
1529
|
+
stream_prefix=log_stream_prefix,
|
1530
|
+
)
|
1531
|
+
emrs_cloudwatch_url = EmrServerlessCloudWatchLogsLink().format_link(
|
1532
|
+
aws_domain=EmrServerlessCloudWatchLogsLink.get_aws_domain(self.hook.conn_partition),
|
1533
|
+
region_name=self.hook.conn_region_name,
|
1534
|
+
aws_partition=self.hook.conn_partition,
|
1535
|
+
awslogs_group=log_group_name,
|
1536
|
+
stream_prefix=log_stream_prefix,
|
1537
|
+
)
|
1538
|
+
self.log.info("CloudWatch logs available at: %s", emrs_cloudwatch_url)
|
1539
|
+
|
1372
1540
|
|
1373
1541
|
class EmrServerlessStopApplicationOperator(BaseOperator):
|
1374
1542
|
"""
|
@@ -1527,10 +1695,9 @@ class EmrServerlessStopApplicationOperator(BaseOperator):
|
|
1527
1695
|
)
|
1528
1696
|
|
1529
1697
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
elif event["status"] == "success":
|
1698
|
+
event = validate_execute_complete_event(event)
|
1699
|
+
|
1700
|
+
if event["status"] == "success":
|
1534
1701
|
self.log.info("EMR serverless application %s stopped successfully", self.application_id)
|
1535
1702
|
|
1536
1703
|
|
@@ -1656,8 +1823,7 @@ class EmrServerlessDeleteApplicationOperator(EmrServerlessStopApplicationOperato
|
|
1656
1823
|
self.log.info("EMR serverless application deleted")
|
1657
1824
|
|
1658
1825
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
elif event["status"] == "success":
|
1826
|
+
event = validate_execute_complete_event(event)
|
1827
|
+
|
1828
|
+
if event["status"] == "success":
|
1663
1829
|
self.log.info("EMR serverless application %s deleted successfully", self.application_id)
|
@@ -20,7 +20,7 @@ from __future__ import annotations
|
|
20
20
|
import os
|
21
21
|
import urllib.parse
|
22
22
|
from functools import cached_property
|
23
|
-
from typing import TYPE_CHECKING, Sequence
|
23
|
+
from typing import TYPE_CHECKING, Any, Sequence
|
24
24
|
|
25
25
|
from airflow.configuration import conf
|
26
26
|
from airflow.exceptions import AirflowException
|
@@ -29,6 +29,7 @@ from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
|
|
29
29
|
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
|
30
30
|
from airflow.providers.amazon.aws.links.glue import GlueJobRunDetailsLink
|
31
31
|
from airflow.providers.amazon.aws.triggers.glue import GlueJobCompleteTrigger
|
32
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
35
|
from airflow.utils.context import Context
|
@@ -215,7 +216,9 @@ class GlueJobOperator(BaseOperator):
|
|
215
216
|
self.log.info("AWS Glue Job: %s. Run Id: %s", self.job_name, self._job_run_id)
|
216
217
|
return self._job_run_id
|
217
218
|
|
218
|
-
def execute_complete(self, context, event=None):
|
219
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
220
|
+
event = validate_execute_complete_event(event)
|
221
|
+
|
219
222
|
if event["status"] != "success":
|
220
223
|
raise AirflowException(f"Error in glue job: {event}")
|
221
224
|
return event["value"]
|
@@ -18,11 +18,12 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
from functools import cached_property
|
21
|
-
from typing import TYPE_CHECKING, Sequence
|
21
|
+
from typing import TYPE_CHECKING, Any, Sequence
|
22
22
|
|
23
23
|
from airflow.configuration import conf
|
24
24
|
from airflow.exceptions import AirflowException
|
25
25
|
from airflow.providers.amazon.aws.triggers.glue_crawler import GlueCrawlerCompleteTrigger
|
26
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
26
27
|
|
27
28
|
if TYPE_CHECKING:
|
28
29
|
from airflow.utils.context import Context
|
@@ -107,7 +108,9 @@ class GlueCrawlerOperator(BaseOperator):
|
|
107
108
|
|
108
109
|
return crawler_name
|
109
110
|
|
110
|
-
def execute_complete(self, context, event=None):
|
111
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
112
|
+
event = validate_execute_complete_event(event)
|
113
|
+
|
111
114
|
if event["status"] != "success":
|
112
115
|
raise AirflowException(f"Error in glue crawl: {event}")
|
113
116
|
return self.config["Name"]
|
@@ -18,12 +18,13 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
from functools import cached_property
|
21
|
-
from typing import TYPE_CHECKING, Sequence
|
21
|
+
from typing import TYPE_CHECKING, Any, Sequence
|
22
22
|
|
23
23
|
from airflow.configuration import conf
|
24
24
|
from airflow.models import BaseOperator
|
25
25
|
from airflow.providers.amazon.aws.hooks.glue_databrew import GlueDataBrewHook
|
26
26
|
from airflow.providers.amazon.aws.triggers.glue_databrew import GlueDataBrewJobCompleteTrigger
|
27
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
27
28
|
|
28
29
|
if TYPE_CHECKING:
|
29
30
|
from airflow.utils.context import Context
|
@@ -101,7 +102,9 @@ class GlueDataBrewStartJobOperator(BaseOperator):
|
|
101
102
|
|
102
103
|
return {"run_id": run_id}
|
103
104
|
|
104
|
-
def execute_complete(self, context: Context, event=None) -> dict[str, str]:
|
105
|
+
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> dict[str, str]:
|
106
|
+
event = validate_execute_complete_event(event)
|
107
|
+
|
105
108
|
run_id = event.get("run_id", "")
|
106
109
|
status = event.get("status", "")
|
107
110
|
|
@@ -26,6 +26,7 @@ from airflow.exceptions import AirflowException
|
|
26
26
|
from airflow.providers.amazon.aws.hooks.lambda_function import LambdaHook
|
27
27
|
from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
|
28
28
|
from airflow.providers.amazon.aws.triggers.lambda_function import LambdaCreateFunctionCompleteTrigger
|
29
|
+
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
29
30
|
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
30
31
|
|
31
32
|
if TYPE_CHECKING:
|
@@ -143,6 +144,8 @@ class LambdaCreateFunctionOperator(AwsBaseOperator[LambdaHook]):
|
|
143
144
|
return response.get("FunctionArn")
|
144
145
|
|
145
146
|
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
|
147
|
+
event = validate_execute_complete_event(event)
|
148
|
+
|
146
149
|
if not event or event["status"] != "success":
|
147
150
|
raise AirflowException(f"Trigger error: event is {event}")
|
148
151
|
|