apache-airflow-providers-amazon 8.17.0rc2__py3-none-any.whl → 8.18.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. airflow/providers/amazon/__init__.py +1 -1
  2. airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py +3 -3
  3. airflow/providers/amazon/aws/auth_manager/cli/definition.py +14 -0
  4. airflow/providers/amazon/aws/auth_manager/cli/idc_commands.py +148 -0
  5. airflow/providers/amazon/aws/auth_manager/views/auth.py +1 -1
  6. airflow/providers/amazon/aws/executors/ecs/Dockerfile +3 -3
  7. airflow/providers/amazon/aws/executors/ecs/boto_schema.py +1 -1
  8. airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +40 -17
  9. airflow/providers/amazon/aws/executors/ecs/utils.py +9 -7
  10. airflow/providers/amazon/aws/executors/utils/exponential_backoff_retry.py +23 -4
  11. airflow/providers/amazon/aws/hooks/athena.py +15 -2
  12. airflow/providers/amazon/aws/hooks/base_aws.py +16 -14
  13. airflow/providers/amazon/aws/hooks/emr.py +6 -0
  14. airflow/providers/amazon/aws/hooks/logs.py +85 -1
  15. airflow/providers/amazon/aws/hooks/neptune.py +85 -0
  16. airflow/providers/amazon/aws/hooks/quicksight.py +9 -8
  17. airflow/providers/amazon/aws/hooks/redshift_cluster.py +8 -7
  18. airflow/providers/amazon/aws/hooks/redshift_sql.py +3 -3
  19. airflow/providers/amazon/aws/hooks/s3.py +4 -6
  20. airflow/providers/amazon/aws/hooks/sagemaker.py +136 -9
  21. airflow/providers/amazon/aws/links/emr.py +122 -2
  22. airflow/providers/amazon/aws/log/cloudwatch_task_handler.py +2 -2
  23. airflow/providers/amazon/aws/operators/athena.py +4 -1
  24. airflow/providers/amazon/aws/operators/batch.py +5 -6
  25. airflow/providers/amazon/aws/operators/ecs.py +6 -2
  26. airflow/providers/amazon/aws/operators/eks.py +31 -26
  27. airflow/providers/amazon/aws/operators/emr.py +192 -26
  28. airflow/providers/amazon/aws/operators/glue.py +5 -2
  29. airflow/providers/amazon/aws/operators/glue_crawler.py +5 -2
  30. airflow/providers/amazon/aws/operators/glue_databrew.py +5 -2
  31. airflow/providers/amazon/aws/operators/lambda_function.py +3 -0
  32. airflow/providers/amazon/aws/operators/neptune.py +218 -0
  33. airflow/providers/amazon/aws/operators/rds.py +21 -12
  34. airflow/providers/amazon/aws/operators/redshift_cluster.py +12 -18
  35. airflow/providers/amazon/aws/operators/redshift_data.py +2 -4
  36. airflow/providers/amazon/aws/operators/sagemaker.py +94 -31
  37. airflow/providers/amazon/aws/operators/step_function.py +4 -1
  38. airflow/providers/amazon/aws/sensors/batch.py +2 -2
  39. airflow/providers/amazon/aws/sensors/ec2.py +4 -2
  40. airflow/providers/amazon/aws/sensors/emr.py +13 -6
  41. airflow/providers/amazon/aws/sensors/glue_catalog_partition.py +4 -1
  42. airflow/providers/amazon/aws/sensors/quicksight.py +17 -14
  43. airflow/providers/amazon/aws/sensors/redshift_cluster.py +2 -4
  44. airflow/providers/amazon/aws/sensors/s3.py +3 -0
  45. airflow/providers/amazon/aws/sensors/sqs.py +4 -1
  46. airflow/providers/amazon/aws/transfers/s3_to_redshift.py +1 -0
  47. airflow/providers/amazon/aws/transfers/sql_to_s3.py +31 -3
  48. airflow/providers/amazon/aws/triggers/neptune.py +115 -0
  49. airflow/providers/amazon/aws/triggers/rds.py +9 -7
  50. airflow/providers/amazon/aws/triggers/redshift_cluster.py +2 -2
  51. airflow/providers/amazon/aws/triggers/redshift_data.py +1 -1
  52. airflow/providers/amazon/aws/triggers/sagemaker.py +82 -1
  53. airflow/providers/amazon/aws/utils/__init__.py +10 -0
  54. airflow/providers/amazon/aws/utils/connection_wrapper.py +12 -8
  55. airflow/providers/amazon/aws/utils/mixins.py +5 -1
  56. airflow/providers/amazon/aws/utils/task_log_fetcher.py +2 -2
  57. airflow/providers/amazon/aws/waiters/neptune.json +85 -0
  58. airflow/providers/amazon/get_provider_info.py +26 -2
  59. {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/METADATA +6 -6
  60. {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/RECORD +62 -57
  61. {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/WHEEL +0 -0
  62. {apache_airflow_providers_amazon-8.17.0rc2.dist-info → apache_airflow_providers_amazon-8.18.0rc2.dist-info}/entry_points.txt +0 -0
@@ -44,7 +44,7 @@ from airflow.providers.amazon.aws.triggers.batch import (
44
44
  BatchCreateComputeEnvironmentTrigger,
45
45
  BatchJobTrigger,
46
46
  )
47
- from airflow.providers.amazon.aws.utils import trim_none_values
47
+ from airflow.providers.amazon.aws.utils import trim_none_values, validate_execute_complete_event
48
48
  from airflow.providers.amazon.aws.utils.task_log_fetcher import AwsTaskLogFetcher
49
49
 
50
50
  if TYPE_CHECKING:
@@ -269,10 +269,7 @@ class BatchOperator(BaseOperator):
269
269
  return self.job_id
270
270
 
271
271
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
272
- if event is None:
273
- err_msg = "Trigger error: event is None"
274
- self.log.info(err_msg)
275
- raise AirflowException(err_msg)
272
+ event = validate_execute_complete_event(event)
276
273
 
277
274
  if event["status"] != "success":
278
275
  raise AirflowException(f"Error while running job: {event}")
@@ -541,7 +538,9 @@ class BatchCreateComputeEnvironmentOperator(BaseOperator):
541
538
  self.log.info("AWS Batch compute environment created successfully")
542
539
  return arn
543
540
 
544
- def execute_complete(self, context, event=None):
541
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
542
+ event = validate_execute_complete_event(event)
543
+
545
544
  if event["status"] != "success":
546
545
  raise AirflowException(f"Error while waiting for the compute environment to be ready: {event}")
547
546
  return event["value"]
@@ -21,7 +21,7 @@ import re
21
21
  import warnings
22
22
  from datetime import timedelta
23
23
  from functools import cached_property
24
- from typing import TYPE_CHECKING, Sequence
24
+ from typing import TYPE_CHECKING, Any, Sequence
25
25
 
26
26
  from airflow.configuration import conf
27
27
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -35,6 +35,7 @@ from airflow.providers.amazon.aws.triggers.ecs import (
35
35
  ClusterInactiveTrigger,
36
36
  TaskDoneTrigger,
37
37
  )
38
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
38
39
  from airflow.providers.amazon.aws.utils.identifiers import generate_uuid
39
40
  from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
40
41
  from airflow.providers.amazon.aws.utils.task_log_fetcher import AwsTaskLogFetcher
@@ -580,7 +581,9 @@ class EcsRunTaskOperator(EcsBaseOperator):
580
581
  else:
581
582
  return None
582
583
 
583
- def execute_complete(self, context, event=None):
584
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str | None:
585
+ event = validate_execute_complete_event(event)
586
+
584
587
  if event["status"] != "success":
585
588
  raise AirflowException(f"Error in task execution: {event}")
586
589
  self.arn = event["task_arn"] # restore arn to its updated value, needed for next steps
@@ -596,6 +599,7 @@ class EcsRunTaskOperator(EcsBaseOperator):
596
599
  )
597
600
  if len(one_log["events"]) > 0:
598
601
  return one_log["events"][0]["message"]
602
+ return None
599
603
 
600
604
  def _after_execution(self):
601
605
  self._check_success_task()
@@ -25,6 +25,7 @@ from functools import cached_property
25
25
  from typing import TYPE_CHECKING, Any, List, Sequence, cast
26
26
 
27
27
  from botocore.exceptions import ClientError, WaiterError
28
+ from deprecated import deprecated
28
29
 
29
30
  from airflow.configuration import conf
30
31
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
@@ -38,6 +39,7 @@ from airflow.providers.amazon.aws.triggers.eks import (
38
39
  EksDeleteFargateProfileTrigger,
39
40
  EksDeleteNodegroupTrigger,
40
41
  )
42
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
41
43
  from airflow.providers.amazon.aws.utils.waiter_with_logging import wait
42
44
  from airflow.providers.cncf.kubernetes.utils.pod_manager import OnFinishAction
43
45
 
@@ -263,13 +265,14 @@ class EksCreateClusterOperator(BaseOperator):
263
265
  return EksHook(aws_conn_id=self.aws_conn_id, region_name=self.region)
264
266
 
265
267
  @property
266
- def eks_hook(self):
267
- warnings.warn(
268
+ @deprecated(
269
+ reason=(
268
270
  "`eks_hook` property is deprecated and will be removed in the future. "
269
- "Please use `hook` property instead.",
270
- AirflowProviderDeprecationWarning,
271
- stacklevel=2,
272
- )
271
+ "Please use `hook` property instead."
272
+ ),
273
+ category=AirflowProviderDeprecationWarning,
274
+ )
275
+ def eks_hook(self):
273
276
  return self.hook
274
277
 
275
278
  def execute(self, context: Context):
@@ -419,11 +422,10 @@ class EksCreateClusterOperator(BaseOperator):
419
422
  raise AirflowException("Error creating cluster")
420
423
 
421
424
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
425
+ event = validate_execute_complete_event(event)
426
+
422
427
  resource = "fargate profile" if self.compute == "fargate" else self.compute
423
- if event is None:
424
- self.log.info("Trigger error: event is None")
425
- raise AirflowException("Trigger error: event is None")
426
- elif event["status"] != "success":
428
+ if event["status"] != "success":
427
429
  raise AirflowException(f"Error creating {resource}: {event}")
428
430
 
429
431
  self.log.info("%s created successfully", resource)
@@ -545,10 +547,11 @@ class EksCreateNodegroupOperator(BaseOperator):
545
547
  timeout=timedelta(seconds=self.waiter_max_attempts * self.waiter_delay + 60),
546
548
  )
547
549
 
548
- def execute_complete(self, context, event=None):
550
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
551
+ event = validate_execute_complete_event(event)
552
+
549
553
  if event["status"] != "success":
550
554
  raise AirflowException(f"Error creating nodegroup: {event}")
551
- return
552
555
 
553
556
 
554
557
  class EksCreateFargateProfileOperator(BaseOperator):
@@ -654,12 +657,13 @@ class EksCreateFargateProfileOperator(BaseOperator):
654
657
  timeout=timedelta(seconds=(self.waiter_max_attempts * self.waiter_delay + 60)),
655
658
  )
656
659
 
657
- def execute_complete(self, context, event=None):
660
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
661
+ event = validate_execute_complete_event(event)
662
+
658
663
  if event["status"] != "success":
659
664
  raise AirflowException(f"Error creating Fargate profile: {event}")
660
- else:
661
- self.log.info("Fargate profile created successfully")
662
- return
665
+
666
+ self.log.info("Fargate profile created successfully")
663
667
 
664
668
 
665
669
  class EksDeleteClusterOperator(BaseOperator):
@@ -786,10 +790,9 @@ class EksDeleteClusterOperator(BaseOperator):
786
790
  self.log.info(SUCCESS_MSG.format(compute=FARGATE_FULL_NAME))
787
791
 
788
792
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
789
- if event is None:
790
- self.log.error("Trigger error. Event is None")
791
- raise AirflowException("Trigger error. Event is None")
792
- elif event["status"] == "success":
793
+ event = validate_execute_complete_event(event)
794
+
795
+ if event["status"] == "success":
793
796
  self.log.info("Cluster deleted successfully.")
794
797
 
795
798
 
@@ -877,10 +880,11 @@ class EksDeleteNodegroupOperator(BaseOperator):
877
880
  clusterName=self.cluster_name, nodegroupName=self.nodegroup_name
878
881
  )
879
882
 
880
- def execute_complete(self, context, event=None):
883
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
884
+ event = validate_execute_complete_event(event)
885
+
881
886
  if event["status"] != "success":
882
887
  raise AirflowException(f"Error deleting nodegroup: {event}")
883
- return
884
888
 
885
889
 
886
890
  class EksDeleteFargateProfileOperator(BaseOperator):
@@ -970,12 +974,13 @@ class EksDeleteFargateProfileOperator(BaseOperator):
970
974
  WaiterConfig={"Delay": self.waiter_delay, "MaxAttempts": self.waiter_max_attempts},
971
975
  )
972
976
 
973
- def execute_complete(self, context, event=None):
977
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
978
+ event = validate_execute_complete_event(event)
979
+
974
980
  if event["status"] != "success":
975
981
  raise AirflowException(f"Error deleting Fargate profile: {event}")
976
- else:
977
- self.log.info("Fargate profile deleted successfully")
978
- return
982
+
983
+ self.log.info("Fargate profile deleted successfully")
979
984
 
980
985
 
981
986
  class EksPodOperator(KubernetesPodOperator):
@@ -27,8 +27,17 @@ from uuid import uuid4
27
27
  from airflow.configuration import conf
28
28
  from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
29
29
  from airflow.models import BaseOperator
30
+ from airflow.models.mappedoperator import MappedOperator
30
31
  from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
31
- from airflow.providers.amazon.aws.links.emr import EmrClusterLink, EmrLogsLink, get_log_uri
32
+ from airflow.providers.amazon.aws.links.emr import (
33
+ EmrClusterLink,
34
+ EmrLogsLink,
35
+ EmrServerlessCloudWatchLogsLink,
36
+ EmrServerlessDashboardLink,
37
+ EmrServerlessLogsLink,
38
+ EmrServerlessS3LogsLink,
39
+ get_log_uri,
40
+ )
32
41
  from airflow.providers.amazon.aws.triggers.emr import (
33
42
  EmrAddStepsTrigger,
34
43
  EmrContainerTrigger,
@@ -41,6 +50,7 @@ from airflow.providers.amazon.aws.triggers.emr import (
41
50
  EmrServerlessStopApplicationTrigger,
42
51
  EmrTerminateJobFlowTrigger,
43
52
  )
53
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
44
54
  from airflow.providers.amazon.aws.utils.waiter import waiter
45
55
  from airflow.providers.amazon.aws.utils.waiter_with_logging import wait
46
56
  from airflow.utils.helpers import exactly_one, prune_dict
@@ -180,11 +190,13 @@ class EmrAddStepsOperator(BaseOperator):
180
190
 
181
191
  return step_ids
182
192
 
183
- def execute_complete(self, context, event=None):
193
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
194
+ event = validate_execute_complete_event(event)
195
+
184
196
  if event["status"] != "success":
185
197
  raise AirflowException(f"Error while running steps: {event}")
186
- else:
187
- self.log.info("Steps completed successfully")
198
+
199
+ self.log.info("Steps completed successfully")
188
200
  return event["value"]
189
201
 
190
202
 
@@ -494,6 +506,8 @@ class EmrContainerOperator(BaseOperator):
494
506
  :param max_tries: Deprecated - use max_polling_attempts instead.
495
507
  :param max_polling_attempts: Maximum number of times to wait for the job run to finish.
496
508
  Defaults to None, which will poll until the job is *not* in a pending, submitted, or running state.
509
+ :param job_retry_max_attempts: Maximum number of times to retry when the EMR job fails.
510
+ Defaults to None, which disable the retry.
497
511
  :param tags: The tags assigned to job runs.
498
512
  Defaults to None
499
513
  :param deferrable: Run operator in the deferrable mode.
@@ -525,6 +539,7 @@ class EmrContainerOperator(BaseOperator):
525
539
  max_tries: int | None = None,
526
540
  tags: dict | None = None,
527
541
  max_polling_attempts: int | None = None,
542
+ job_retry_max_attempts: int | None = None,
528
543
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
529
544
  **kwargs: Any,
530
545
  ) -> None:
@@ -540,6 +555,7 @@ class EmrContainerOperator(BaseOperator):
540
555
  self.wait_for_completion = wait_for_completion
541
556
  self.poll_interval = poll_interval
542
557
  self.max_polling_attempts = max_polling_attempts
558
+ self.job_retry_max_attempts = job_retry_max_attempts
543
559
  self.tags = tags
544
560
  self.job_id: str | None = None
545
561
  self.deferrable = deferrable
@@ -574,6 +590,7 @@ class EmrContainerOperator(BaseOperator):
574
590
  self.configuration_overrides,
575
591
  self.client_request_token,
576
592
  self.tags,
593
+ self.job_retry_max_attempts,
577
594
  )
578
595
  if self.deferrable:
579
596
  query_status = self.hook.check_query_status(job_id=self.job_id)
@@ -619,7 +636,9 @@ class EmrContainerOperator(BaseOperator):
619
636
  f"query_execution_id is {self.job_id}. Error: {error_message}"
620
637
  )
621
638
 
622
- def execute_complete(self, context, event=None):
639
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
640
+ event = validate_execute_complete_event(event)
641
+
623
642
  if event["status"] != "success":
624
643
  raise AirflowException(f"Error while running job: {event}")
625
644
 
@@ -806,11 +825,13 @@ class EmrCreateJobFlowOperator(BaseOperator):
806
825
  )
807
826
  return self._job_flow_id
808
827
 
809
- def execute_complete(self, context, event=None):
828
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
829
+ event = validate_execute_complete_event(event)
830
+
810
831
  if event["status"] != "success":
811
832
  raise AirflowException(f"Error creating jobFlow: {event}")
812
- else:
813
- self.log.info("JobFlow created successfully")
833
+
834
+ self.log.info("JobFlow created successfully")
814
835
  return event["job_flow_id"]
815
836
 
816
837
  def on_kill(self) -> None:
@@ -969,12 +990,13 @@ class EmrTerminateJobFlowOperator(BaseOperator):
969
990
  timeout=timedelta(seconds=self.waiter_max_attempts * self.waiter_delay + 60),
970
991
  )
971
992
 
972
- def execute_complete(self, context, event=None):
993
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
994
+ event = validate_execute_complete_event(event)
995
+
973
996
  if event["status"] != "success":
974
997
  raise AirflowException(f"Error terminating JobFlow: {event}")
975
- else:
976
- self.log.info("Jobflow terminated successfully.")
977
- return
998
+
999
+ self.log.info("Jobflow terminated successfully.")
978
1000
 
979
1001
 
980
1002
  class EmrServerlessCreateApplicationOperator(BaseOperator):
@@ -1135,7 +1157,9 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
1135
1157
  )
1136
1158
 
1137
1159
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
1138
- if event is None or event["status"] != "success":
1160
+ event = validate_execute_complete_event(event)
1161
+
1162
+ if event["status"] != "success":
1139
1163
  raise AirflowException(f"Trigger error: Application failed to start, event is {event}")
1140
1164
 
1141
1165
  self.log.info("Application %s started", event["application_id"])
@@ -1172,6 +1196,9 @@ class EmrServerlessStartJobOperator(BaseOperator):
1172
1196
  :param deferrable: If True, the operator will wait asynchronously for the crawl to complete.
1173
1197
  This implies waiting for completion. This mode requires aiobotocore module to be installed.
1174
1198
  (default: False, but can be overridden in config file by setting default_deferrable to True)
1199
+ :param enable_application_ui_links: If True, the operator will generate one-time links to EMR Serverless
1200
+ application UIs. The generated links will allow any user with access to the DAG to see the Spark or
1201
+ Tez UI or Spark stdout logs. Defaults to False.
1175
1202
  """
1176
1203
 
1177
1204
  template_fields: Sequence[str] = (
@@ -1181,6 +1208,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
1181
1208
  "job_driver",
1182
1209
  "configuration_overrides",
1183
1210
  "name",
1211
+ "aws_conn_id",
1184
1212
  )
1185
1213
 
1186
1214
  template_fields_renderers = {
@@ -1188,12 +1216,48 @@ class EmrServerlessStartJobOperator(BaseOperator):
1188
1216
  "configuration_overrides": "json",
1189
1217
  }
1190
1218
 
1219
+ @property
1220
+ def operator_extra_links(self):
1221
+ """
1222
+ Dynamically add extra links depending on the job type and if they're enabled.
1223
+
1224
+ If S3 or CloudWatch monitoring configurations exist, add links directly to the relevant consoles.
1225
+ Only add dashboard links if they're explicitly enabled. These are one-time links that any user
1226
+ can access, but expire on first click or one hour, whichever comes first.
1227
+ """
1228
+ op_extra_links = []
1229
+
1230
+ if isinstance(self, MappedOperator):
1231
+ enable_application_ui_links = self.partial_kwargs.get(
1232
+ "enable_application_ui_links"
1233
+ ) or self.expand_input.value.get("enable_application_ui_links")
1234
+ job_driver = self.partial_kwargs.get("job_driver") or self.expand_input.value.get("job_driver")
1235
+ configuration_overrides = self.partial_kwargs.get(
1236
+ "configuration_overrides"
1237
+ ) or self.expand_input.value.get("configuration_overrides")
1238
+
1239
+ else:
1240
+ enable_application_ui_links = self.enable_application_ui_links
1241
+ configuration_overrides = self.configuration_overrides
1242
+ job_driver = self.job_driver
1243
+
1244
+ if enable_application_ui_links:
1245
+ op_extra_links.extend([EmrServerlessDashboardLink()])
1246
+ if "sparkSubmit" in job_driver:
1247
+ op_extra_links.extend([EmrServerlessLogsLink()])
1248
+ if self.is_monitoring_in_job_override("s3MonitoringConfiguration", configuration_overrides):
1249
+ op_extra_links.extend([EmrServerlessS3LogsLink()])
1250
+ if self.is_monitoring_in_job_override("cloudWatchLoggingConfiguration", configuration_overrides):
1251
+ op_extra_links.extend([EmrServerlessCloudWatchLogsLink()])
1252
+
1253
+ return tuple(op_extra_links)
1254
+
1191
1255
  def __init__(
1192
1256
  self,
1193
1257
  application_id: str,
1194
1258
  execution_role_arn: str,
1195
1259
  job_driver: dict,
1196
- configuration_overrides: dict | None,
1260
+ configuration_overrides: dict | None = None,
1197
1261
  client_request_token: str = "",
1198
1262
  config: dict | None = None,
1199
1263
  wait_for_completion: bool = True,
@@ -1204,6 +1268,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
1204
1268
  waiter_max_attempts: int | ArgNotSet = NOTSET,
1205
1269
  waiter_delay: int | ArgNotSet = NOTSET,
1206
1270
  deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
1271
+ enable_application_ui_links: bool = False,
1207
1272
  **kwargs,
1208
1273
  ):
1209
1274
  if waiter_check_interval_seconds is NOTSET:
@@ -1243,6 +1308,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
1243
1308
  self.waiter_delay = int(waiter_delay) # type: ignore[arg-type]
1244
1309
  self.job_id: str | None = None
1245
1310
  self.deferrable = deferrable
1311
+ self.enable_application_ui_links = enable_application_ui_links
1246
1312
  super().__init__(**kwargs)
1247
1313
 
1248
1314
  self.client_request_token = client_request_token or str(uuid4())
@@ -1300,6 +1366,9 @@ class EmrServerlessStartJobOperator(BaseOperator):
1300
1366
 
1301
1367
  self.job_id = response["jobRunId"]
1302
1368
  self.log.info("EMR serverless job started: %s", self.job_id)
1369
+
1370
+ self.persist_links(context)
1371
+
1303
1372
  if self.deferrable:
1304
1373
  self.defer(
1305
1374
  trigger=EmrServerlessStartJobTrigger(
@@ -1312,6 +1381,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
1312
1381
  method_name="execute_complete",
1313
1382
  timeout=timedelta(seconds=self.waiter_max_attempts * self.waiter_delay),
1314
1383
  )
1384
+
1315
1385
  if self.wait_for_completion:
1316
1386
  waiter = self.hook.get_waiter("serverless_job_completed")
1317
1387
  wait(
@@ -1327,10 +1397,9 @@ class EmrServerlessStartJobOperator(BaseOperator):
1327
1397
  return self.job_id
1328
1398
 
1329
1399
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
1330
- if event is None:
1331
- self.log.error("Trigger error: event is None")
1332
- raise AirflowException("Trigger error: event is None")
1333
- elif event["status"] == "success":
1400
+ event = validate_execute_complete_event(event)
1401
+
1402
+ if event["status"] == "success":
1334
1403
  self.log.info("Serverless job completed")
1335
1404
  return event["job_id"]
1336
1405
 
@@ -1369,6 +1438,105 @@ class EmrServerlessStartJobOperator(BaseOperator):
1369
1438
  check_interval_seconds=self.waiter_delay,
1370
1439
  )
1371
1440
 
1441
+ def is_monitoring_in_job_override(self, config_key: str, job_override: dict | None) -> bool:
1442
+ """
1443
+ Check if monitoring is enabled for the job.
1444
+
1445
+ Note: This is not compatible with application defaults:
1446
+ https://docs.aws.amazon.com/emr/latest/EMR-Serverless-UserGuide/default-configs.html
1447
+
1448
+ This is used to determine what extra links should be shown.
1449
+ """
1450
+ monitoring_config = (job_override or {}).get("monitoringConfiguration")
1451
+ if monitoring_config is None or config_key not in monitoring_config:
1452
+ return False
1453
+
1454
+ # CloudWatch can have an "enabled" flag set to False
1455
+ if config_key == "cloudWatchLoggingConfiguration":
1456
+ return monitoring_config.get(config_key).get("enabled") is True
1457
+
1458
+ return config_key in monitoring_config
1459
+
1460
+ def persist_links(self, context: Context):
1461
+ """Populate the relevant extra links for the EMR Serverless jobs."""
1462
+ # Persist the EMR Serverless Dashboard link (Spark/Tez UI)
1463
+ if self.enable_application_ui_links:
1464
+ EmrServerlessDashboardLink.persist(
1465
+ context=context,
1466
+ operator=self,
1467
+ region_name=self.hook.conn_region_name,
1468
+ aws_partition=self.hook.conn_partition,
1469
+ conn_id=self.hook.aws_conn_id,
1470
+ application_id=self.application_id,
1471
+ job_run_id=self.job_id,
1472
+ )
1473
+
1474
+ # If this is a Spark job, persist the EMR Serverless logs link (Driver stdout)
1475
+ if self.enable_application_ui_links and "sparkSubmit" in self.job_driver:
1476
+ EmrServerlessLogsLink.persist(
1477
+ context=context,
1478
+ operator=self,
1479
+ region_name=self.hook.conn_region_name,
1480
+ aws_partition=self.hook.conn_partition,
1481
+ conn_id=self.hook.aws_conn_id,
1482
+ application_id=self.application_id,
1483
+ job_run_id=self.job_id,
1484
+ )
1485
+
1486
+ # Add S3 and/or CloudWatch links if either is enabled
1487
+ if self.is_monitoring_in_job_override("s3MonitoringConfiguration", self.configuration_overrides):
1488
+ log_uri = (
1489
+ (self.configuration_overrides or {})
1490
+ .get("monitoringConfiguration", {})
1491
+ .get("s3MonitoringConfiguration", {})
1492
+ .get("logUri")
1493
+ )
1494
+ EmrServerlessS3LogsLink.persist(
1495
+ context=context,
1496
+ operator=self,
1497
+ region_name=self.hook.conn_region_name,
1498
+ aws_partition=self.hook.conn_partition,
1499
+ log_uri=log_uri,
1500
+ application_id=self.application_id,
1501
+ job_run_id=self.job_id,
1502
+ )
1503
+ emrs_s3_url = EmrServerlessS3LogsLink().format_link(
1504
+ aws_domain=EmrServerlessCloudWatchLogsLink.get_aws_domain(self.hook.conn_partition),
1505
+ region_name=self.hook.conn_region_name,
1506
+ aws_partition=self.hook.conn_partition,
1507
+ log_uri=log_uri,
1508
+ application_id=self.application_id,
1509
+ job_run_id=self.job_id,
1510
+ )
1511
+ self.log.info("S3 logs available at: %s", emrs_s3_url)
1512
+
1513
+ if self.is_monitoring_in_job_override("cloudWatchLoggingConfiguration", self.configuration_overrides):
1514
+ cloudwatch_config = (
1515
+ (self.configuration_overrides or {})
1516
+ .get("monitoringConfiguration", {})
1517
+ .get("cloudWatchLoggingConfiguration", {})
1518
+ )
1519
+ log_group_name = cloudwatch_config.get("logGroupName", "/aws/emr-serverless")
1520
+ log_stream_prefix = cloudwatch_config.get("logStreamNamePrefix", "")
1521
+ log_stream_prefix = f"{log_stream_prefix}/applications/{self.application_id}/jobs/{self.job_id}"
1522
+
1523
+ EmrServerlessCloudWatchLogsLink.persist(
1524
+ context=context,
1525
+ operator=self,
1526
+ region_name=self.hook.conn_region_name,
1527
+ aws_partition=self.hook.conn_partition,
1528
+ awslogs_group=log_group_name,
1529
+ stream_prefix=log_stream_prefix,
1530
+ )
1531
+ emrs_cloudwatch_url = EmrServerlessCloudWatchLogsLink().format_link(
1532
+ aws_domain=EmrServerlessCloudWatchLogsLink.get_aws_domain(self.hook.conn_partition),
1533
+ region_name=self.hook.conn_region_name,
1534
+ aws_partition=self.hook.conn_partition,
1535
+ awslogs_group=log_group_name,
1536
+ stream_prefix=log_stream_prefix,
1537
+ )
1538
+ self.log.info("CloudWatch logs available at: %s", emrs_cloudwatch_url)
1539
+
1372
1540
 
1373
1541
  class EmrServerlessStopApplicationOperator(BaseOperator):
1374
1542
  """
@@ -1527,10 +1695,9 @@ class EmrServerlessStopApplicationOperator(BaseOperator):
1527
1695
  )
1528
1696
 
1529
1697
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
1530
- if event is None:
1531
- self.log.error("Trigger error: event is None")
1532
- raise AirflowException("Trigger error: event is None")
1533
- elif event["status"] == "success":
1698
+ event = validate_execute_complete_event(event)
1699
+
1700
+ if event["status"] == "success":
1534
1701
  self.log.info("EMR serverless application %s stopped successfully", self.application_id)
1535
1702
 
1536
1703
 
@@ -1656,8 +1823,7 @@ class EmrServerlessDeleteApplicationOperator(EmrServerlessStopApplicationOperato
1656
1823
  self.log.info("EMR serverless application deleted")
1657
1824
 
1658
1825
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
1659
- if event is None:
1660
- self.log.error("Trigger error: event is None")
1661
- raise AirflowException("Trigger error: event is None")
1662
- elif event["status"] == "success":
1826
+ event = validate_execute_complete_event(event)
1827
+
1828
+ if event["status"] == "success":
1663
1829
  self.log.info("EMR serverless application %s deleted successfully", self.application_id)
@@ -20,7 +20,7 @@ from __future__ import annotations
20
20
  import os
21
21
  import urllib.parse
22
22
  from functools import cached_property
23
- from typing import TYPE_CHECKING, Sequence
23
+ from typing import TYPE_CHECKING, Any, Sequence
24
24
 
25
25
  from airflow.configuration import conf
26
26
  from airflow.exceptions import AirflowException
@@ -29,6 +29,7 @@ from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
29
29
  from airflow.providers.amazon.aws.hooks.s3 import S3Hook
30
30
  from airflow.providers.amazon.aws.links.glue import GlueJobRunDetailsLink
31
31
  from airflow.providers.amazon.aws.triggers.glue import GlueJobCompleteTrigger
32
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
32
33
 
33
34
  if TYPE_CHECKING:
34
35
  from airflow.utils.context import Context
@@ -215,7 +216,9 @@ class GlueJobOperator(BaseOperator):
215
216
  self.log.info("AWS Glue Job: %s. Run Id: %s", self.job_name, self._job_run_id)
216
217
  return self._job_run_id
217
218
 
218
- def execute_complete(self, context, event=None):
219
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
220
+ event = validate_execute_complete_event(event)
221
+
219
222
  if event["status"] != "success":
220
223
  raise AirflowException(f"Error in glue job: {event}")
221
224
  return event["value"]
@@ -18,11 +18,12 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from functools import cached_property
21
- from typing import TYPE_CHECKING, Sequence
21
+ from typing import TYPE_CHECKING, Any, Sequence
22
22
 
23
23
  from airflow.configuration import conf
24
24
  from airflow.exceptions import AirflowException
25
25
  from airflow.providers.amazon.aws.triggers.glue_crawler import GlueCrawlerCompleteTrigger
26
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
26
27
 
27
28
  if TYPE_CHECKING:
28
29
  from airflow.utils.context import Context
@@ -107,7 +108,9 @@ class GlueCrawlerOperator(BaseOperator):
107
108
 
108
109
  return crawler_name
109
110
 
110
- def execute_complete(self, context, event=None):
111
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
112
+ event = validate_execute_complete_event(event)
113
+
111
114
  if event["status"] != "success":
112
115
  raise AirflowException(f"Error in glue crawl: {event}")
113
116
  return self.config["Name"]
@@ -18,12 +18,13 @@
18
18
  from __future__ import annotations
19
19
 
20
20
  from functools import cached_property
21
- from typing import TYPE_CHECKING, Sequence
21
+ from typing import TYPE_CHECKING, Any, Sequence
22
22
 
23
23
  from airflow.configuration import conf
24
24
  from airflow.models import BaseOperator
25
25
  from airflow.providers.amazon.aws.hooks.glue_databrew import GlueDataBrewHook
26
26
  from airflow.providers.amazon.aws.triggers.glue_databrew import GlueDataBrewJobCompleteTrigger
27
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
27
28
 
28
29
  if TYPE_CHECKING:
29
30
  from airflow.utils.context import Context
@@ -101,7 +102,9 @@ class GlueDataBrewStartJobOperator(BaseOperator):
101
102
 
102
103
  return {"run_id": run_id}
103
104
 
104
- def execute_complete(self, context: Context, event=None) -> dict[str, str]:
105
+ def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> dict[str, str]:
106
+ event = validate_execute_complete_event(event)
107
+
105
108
  run_id = event.get("run_id", "")
106
109
  status = event.get("status", "")
107
110
 
@@ -26,6 +26,7 @@ from airflow.exceptions import AirflowException
26
26
  from airflow.providers.amazon.aws.hooks.lambda_function import LambdaHook
27
27
  from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
28
28
  from airflow.providers.amazon.aws.triggers.lambda_function import LambdaCreateFunctionCompleteTrigger
29
+ from airflow.providers.amazon.aws.utils import validate_execute_complete_event
29
30
  from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
30
31
 
31
32
  if TYPE_CHECKING:
@@ -143,6 +144,8 @@ class LambdaCreateFunctionOperator(AwsBaseOperator[LambdaHook]):
143
144
  return response.get("FunctionArn")
144
145
 
145
146
  def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> str:
147
+ event = validate_execute_complete_event(event)
148
+
146
149
  if not event or event["status"] != "success":
147
150
  raise AirflowException(f"Trigger error: event is {event}")
148
151