apache-airflow-providers-amazon 9.6.1rc1__py3-none-any.whl → 9.7.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/__init__.py +3 -3
- airflow/providers/amazon/aws/auth_manager/avp/entities.py +1 -1
- airflow/providers/amazon/aws/auth_manager/avp/schema.json +33 -7
- airflow/providers/amazon/aws/auth_manager/aws_auth_manager.py +8 -5
- airflow/providers/amazon/aws/auth_manager/cli/avp_commands.py +6 -9
- airflow/providers/amazon/aws/auth_manager/cli/definition.py +2 -12
- airflow/providers/amazon/aws/auth_manager/datamodels/login.py +26 -0
- airflow/providers/amazon/aws/auth_manager/routes/__init__.py +16 -0
- airflow/providers/amazon/aws/auth_manager/{router → routes}/login.py +29 -10
- airflow/providers/amazon/aws/executors/batch/batch_executor.py +1 -5
- airflow/providers/amazon/aws/executors/ecs/ecs_executor.py +1 -6
- airflow/providers/amazon/aws/hooks/redshift_sql.py +1 -4
- airflow/providers/amazon/aws/operators/emr.py +147 -142
- airflow/providers/amazon/aws/operators/glue.py +56 -48
- airflow/providers/amazon/aws/queues/__init__.py +16 -0
- airflow/providers/amazon/aws/queues/sqs.py +52 -0
- airflow/providers/amazon/aws/sensors/emr.py +49 -52
- airflow/providers/amazon/get_provider_info.py +2 -7
- airflow/providers/amazon/version_compat.py +0 -1
- {apache_airflow_providers_amazon-9.6.1rc1.dist-info → apache_airflow_providers_amazon-9.7.0rc1.dist-info}/METADATA +37 -30
- {apache_airflow_providers_amazon-9.6.1rc1.dist-info → apache_airflow_providers_amazon-9.7.0rc1.dist-info}/RECORD +24 -20
- /airflow/providers/amazon/aws/auth_manager/{router → datamodels}/__init__.py +0 -0
- {apache_airflow_providers_amazon-9.6.1rc1.dist-info → apache_airflow_providers_amazon-9.7.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_amazon-9.6.1rc1.dist-info → apache_airflow_providers_amazon-9.7.0rc1.dist-info}/entry_points.txt +0 -0
@@ -21,13 +21,11 @@ import ast
|
|
21
21
|
import warnings
|
22
22
|
from collections.abc import Sequence
|
23
23
|
from datetime import timedelta
|
24
|
-
from functools import cached_property
|
25
24
|
from typing import TYPE_CHECKING, Any
|
26
25
|
from uuid import uuid4
|
27
26
|
|
28
27
|
from airflow.configuration import conf
|
29
28
|
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
30
|
-
from airflow.models import BaseOperator
|
31
29
|
from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
|
32
30
|
from airflow.providers.amazon.aws.links.emr import (
|
33
31
|
EmrClusterLink,
|
@@ -38,6 +36,7 @@ from airflow.providers.amazon.aws.links.emr import (
|
|
38
36
|
EmrServerlessS3LogsLink,
|
39
37
|
get_log_uri,
|
40
38
|
)
|
39
|
+
from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator
|
41
40
|
from airflow.providers.amazon.aws.triggers.emr import (
|
42
41
|
EmrAddStepsTrigger,
|
43
42
|
EmrContainerTrigger,
|
@@ -51,6 +50,7 @@ from airflow.providers.amazon.aws.triggers.emr import (
|
|
51
50
|
EmrTerminateJobFlowTrigger,
|
52
51
|
)
|
53
52
|
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
53
|
+
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
54
54
|
from airflow.providers.amazon.aws.utils.waiter import (
|
55
55
|
WAITER_POLICY_NAME_MAPPING,
|
56
56
|
WaitPolicy,
|
@@ -64,7 +64,7 @@ if TYPE_CHECKING:
|
|
64
64
|
from airflow.utils.context import Context
|
65
65
|
|
66
66
|
|
67
|
-
class EmrAddStepsOperator(
|
67
|
+
class EmrAddStepsOperator(AwsBaseOperator[EmrHook]):
|
68
68
|
"""
|
69
69
|
An operator that adds steps to an existing EMR job_flow.
|
70
70
|
|
@@ -79,10 +79,13 @@ class EmrAddStepsOperator(BaseOperator):
|
|
79
79
|
:param cluster_states: Acceptable cluster states when searching for JobFlow id by job_flow_name.
|
80
80
|
(templated)
|
81
81
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
82
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
82
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
83
83
|
running Airflow in a distributed manner and aws_conn_id is None or
|
84
84
|
empty, then default boto3 configuration would be used (and must be
|
85
85
|
maintained on each worker node).
|
86
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
87
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
88
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
86
89
|
:param steps: boto3 style steps or reference to a steps file (must be '.json') to
|
87
90
|
be added to the jobflow. (templated)
|
88
91
|
:param wait_for_completion: If True, the operator will wait for all the steps to be completed.
|
@@ -94,7 +97,8 @@ class EmrAddStepsOperator(BaseOperator):
|
|
94
97
|
(default: False)
|
95
98
|
"""
|
96
99
|
|
97
|
-
|
100
|
+
aws_hook_class = EmrHook
|
101
|
+
template_fields: Sequence[str] = aws_template_fields(
|
98
102
|
"job_flow_id",
|
99
103
|
"job_flow_name",
|
100
104
|
"cluster_states",
|
@@ -115,7 +119,6 @@ class EmrAddStepsOperator(BaseOperator):
|
|
115
119
|
job_flow_id: str | None = None,
|
116
120
|
job_flow_name: str | None = None,
|
117
121
|
cluster_states: list[str] | None = None,
|
118
|
-
aws_conn_id: str | None = "aws_default",
|
119
122
|
steps: list[dict] | str | None = None,
|
120
123
|
wait_for_completion: bool = False,
|
121
124
|
waiter_delay: int = 30,
|
@@ -129,7 +132,6 @@ class EmrAddStepsOperator(BaseOperator):
|
|
129
132
|
super().__init__(**kwargs)
|
130
133
|
cluster_states = cluster_states or []
|
131
134
|
steps = steps or []
|
132
|
-
self.aws_conn_id = aws_conn_id
|
133
135
|
self.job_flow_id = job_flow_id
|
134
136
|
self.job_flow_name = job_flow_name
|
135
137
|
self.cluster_states = cluster_states
|
@@ -141,9 +143,7 @@ class EmrAddStepsOperator(BaseOperator):
|
|
141
143
|
self.deferrable = deferrable
|
142
144
|
|
143
145
|
def execute(self, context: Context) -> list[str]:
|
144
|
-
|
145
|
-
|
146
|
-
job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(
|
146
|
+
job_flow_id = self.job_flow_id or self.hook.get_cluster_id_by_name(
|
147
147
|
str(self.job_flow_name), self.cluster_states
|
148
148
|
)
|
149
149
|
|
@@ -156,17 +156,17 @@ class EmrAddStepsOperator(BaseOperator):
|
|
156
156
|
EmrClusterLink.persist(
|
157
157
|
context=context,
|
158
158
|
operator=self,
|
159
|
-
region_name=
|
160
|
-
aws_partition=
|
159
|
+
region_name=self.hook.conn_region_name,
|
160
|
+
aws_partition=self.hook.conn_partition,
|
161
161
|
job_flow_id=job_flow_id,
|
162
162
|
)
|
163
163
|
EmrLogsLink.persist(
|
164
164
|
context=context,
|
165
165
|
operator=self,
|
166
|
-
region_name=
|
167
|
-
aws_partition=
|
166
|
+
region_name=self.hook.conn_region_name,
|
167
|
+
aws_partition=self.hook.conn_partition,
|
168
168
|
job_flow_id=self.job_flow_id,
|
169
|
-
log_uri=get_log_uri(emr_client=
|
169
|
+
log_uri=get_log_uri(emr_client=self.hook.conn, job_flow_id=job_flow_id),
|
170
170
|
)
|
171
171
|
|
172
172
|
self.log.info("Adding steps to %s", job_flow_id)
|
@@ -176,7 +176,7 @@ class EmrAddStepsOperator(BaseOperator):
|
|
176
176
|
steps = self.steps
|
177
177
|
if isinstance(steps, str):
|
178
178
|
steps = ast.literal_eval(steps)
|
179
|
-
step_ids =
|
179
|
+
step_ids = self.hook.add_job_flow_steps(
|
180
180
|
job_flow_id=job_flow_id,
|
181
181
|
steps=steps,
|
182
182
|
wait_for_completion=self.wait_for_completion,
|
@@ -208,7 +208,7 @@ class EmrAddStepsOperator(BaseOperator):
|
|
208
208
|
return validated_event["value"]
|
209
209
|
|
210
210
|
|
211
|
-
class EmrStartNotebookExecutionOperator(
|
211
|
+
class EmrStartNotebookExecutionOperator(AwsBaseOperator[EmrHook]):
|
212
212
|
"""
|
213
213
|
An operator that starts an EMR notebook execution.
|
214
214
|
|
@@ -232,9 +232,18 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
232
232
|
:param tags: Optional list of key value pair to associate with the notebook execution.
|
233
233
|
:param waiter_max_attempts: Maximum number of tries before failing.
|
234
234
|
:param waiter_delay: Number of seconds between polling the state of the notebook.
|
235
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
236
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
237
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
238
|
+
empty, then default boto3 configuration would be used (and must be
|
239
|
+
maintained on each worker node).
|
240
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
241
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
242
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
235
243
|
"""
|
236
244
|
|
237
|
-
|
245
|
+
aws_hook_class = EmrHook
|
246
|
+
template_fields: Sequence[str] = aws_template_fields(
|
238
247
|
"editor_id",
|
239
248
|
"cluster_id",
|
240
249
|
"relative_path",
|
@@ -260,7 +269,6 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
260
269
|
master_instance_security_group_id: str | None = None,
|
261
270
|
tags: list | None = None,
|
262
271
|
wait_for_completion: bool = False,
|
263
|
-
aws_conn_id: str | None = "aws_default",
|
264
272
|
waiter_max_attempts: int | None = None,
|
265
273
|
waiter_delay: int | None = None,
|
266
274
|
**kwargs: Any,
|
@@ -275,7 +283,6 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
275
283
|
self.tags = tags or []
|
276
284
|
self.wait_for_completion = wait_for_completion
|
277
285
|
self.cluster_id = cluster_id
|
278
|
-
self.aws_conn_id = aws_conn_id
|
279
286
|
self.waiter_max_attempts = waiter_max_attempts or 25
|
280
287
|
self.waiter_delay = waiter_delay or 60
|
281
288
|
self.master_instance_security_group_id = master_instance_security_group_id
|
@@ -286,9 +293,8 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
286
293
|
"Type": "EMR",
|
287
294
|
"MasterInstanceSecurityGroupId": self.master_instance_security_group_id or "",
|
288
295
|
}
|
289
|
-
emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)
|
290
296
|
|
291
|
-
response =
|
297
|
+
response = self.hook.conn.start_notebook_execution(
|
292
298
|
EditorId=self.editor_id,
|
293
299
|
RelativePath=self.relative_path,
|
294
300
|
NotebookExecutionName=self.notebook_execution_name,
|
@@ -305,7 +311,7 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
305
311
|
self.log.info("Notebook execution started: %s", response["NotebookExecutionId"])
|
306
312
|
notebook_execution_id = response["NotebookExecutionId"]
|
307
313
|
if self.wait_for_completion:
|
308
|
-
|
314
|
+
self.hook.get_waiter("notebook_running").wait(
|
309
315
|
NotebookExecutionId=notebook_execution_id,
|
310
316
|
WaiterConfig=prune_dict(
|
311
317
|
{
|
@@ -319,7 +325,7 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
319
325
|
# failed, adding that here. This could maybe be deprecated
|
320
326
|
# later to bring it in line with how other waiters behave.
|
321
327
|
failure_states = {"FAILED"}
|
322
|
-
final_status =
|
328
|
+
final_status = self.hook.conn.describe_notebook_execution(
|
323
329
|
NotebookExecutionId=notebook_execution_id
|
324
330
|
)["NotebookExecution"]["Status"]
|
325
331
|
if final_status in failure_states:
|
@@ -328,7 +334,7 @@ class EmrStartNotebookExecutionOperator(BaseOperator):
|
|
328
334
|
return notebook_execution_id
|
329
335
|
|
330
336
|
|
331
|
-
class EmrStopNotebookExecutionOperator(
|
337
|
+
class EmrStopNotebookExecutionOperator(AwsBaseOperator[EmrHook]):
|
332
338
|
"""
|
333
339
|
An operator that stops a running EMR notebook execution.
|
334
340
|
|
@@ -339,16 +345,20 @@ class EmrStopNotebookExecutionOperator(BaseOperator):
|
|
339
345
|
:param notebook_execution_id: The unique identifier of the notebook execution.
|
340
346
|
:param wait_for_completion: If True, the operator will wait for the notebook.
|
341
347
|
to be in a STOPPED or FINISHED state. Defaults to False.
|
342
|
-
:param aws_conn_id:
|
343
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
348
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
349
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
344
350
|
running Airflow in a distributed manner and aws_conn_id is None or
|
345
351
|
empty, then default boto3 configuration would be used (and must be
|
346
352
|
maintained on each worker node).
|
353
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
354
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
355
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
347
356
|
:param waiter_max_attempts: Maximum number of tries before failing.
|
348
357
|
:param waiter_delay: Number of seconds between polling the state of the notebook.
|
349
358
|
"""
|
350
359
|
|
351
|
-
|
360
|
+
aws_hook_class = EmrHook
|
361
|
+
template_fields: Sequence[str] = aws_template_fields(
|
352
362
|
"notebook_execution_id",
|
353
363
|
"waiter_delay",
|
354
364
|
"waiter_max_attempts",
|
@@ -358,7 +368,6 @@ class EmrStopNotebookExecutionOperator(BaseOperator):
|
|
358
368
|
self,
|
359
369
|
notebook_execution_id: str,
|
360
370
|
wait_for_completion: bool = False,
|
361
|
-
aws_conn_id: str | None = "aws_default",
|
362
371
|
waiter_max_attempts: int | None = None,
|
363
372
|
waiter_delay: int | None = None,
|
364
373
|
**kwargs: Any,
|
@@ -366,16 +375,14 @@ class EmrStopNotebookExecutionOperator(BaseOperator):
|
|
366
375
|
super().__init__(**kwargs)
|
367
376
|
self.notebook_execution_id = notebook_execution_id
|
368
377
|
self.wait_for_completion = wait_for_completion
|
369
|
-
self.aws_conn_id = aws_conn_id
|
370
378
|
self.waiter_max_attempts = waiter_max_attempts or 25
|
371
379
|
self.waiter_delay = waiter_delay or 60
|
372
380
|
|
373
381
|
def execute(self, context: Context) -> None:
|
374
|
-
|
375
|
-
emr_hook.conn.stop_notebook_execution(NotebookExecutionId=self.notebook_execution_id)
|
382
|
+
self.hook.conn.stop_notebook_execution(NotebookExecutionId=self.notebook_execution_id)
|
376
383
|
|
377
384
|
if self.wait_for_completion:
|
378
|
-
|
385
|
+
self.hook.get_waiter("notebook_stopped").wait(
|
379
386
|
NotebookExecutionId=self.notebook_execution_id,
|
380
387
|
WaiterConfig=prune_dict(
|
381
388
|
{
|
@@ -386,7 +393,7 @@ class EmrStopNotebookExecutionOperator(BaseOperator):
|
|
386
393
|
)
|
387
394
|
|
388
395
|
|
389
|
-
class EmrEksCreateClusterOperator(
|
396
|
+
class EmrEksCreateClusterOperator(AwsBaseOperator[EmrContainerHook]):
|
390
397
|
"""
|
391
398
|
An operator that creates EMR on EKS virtual clusters.
|
392
399
|
|
@@ -399,11 +406,19 @@ class EmrEksCreateClusterOperator(BaseOperator):
|
|
399
406
|
:param eks_namespace: namespace used by the EKS cluster.
|
400
407
|
:param virtual_cluster_id: The EMR on EKS virtual cluster id.
|
401
408
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
409
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
410
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
411
|
+
empty, then default boto3 configuration would be used (and must be
|
412
|
+
maintained on each worker node).
|
413
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
414
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
415
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
402
416
|
:param tags: The tags assigned to created cluster.
|
403
417
|
Defaults to None
|
404
418
|
"""
|
405
419
|
|
406
|
-
|
420
|
+
aws_hook_class = EmrContainerHook
|
421
|
+
template_fields: Sequence[str] = aws_template_fields(
|
407
422
|
"virtual_cluster_name",
|
408
423
|
"eks_cluster_name",
|
409
424
|
"eks_namespace",
|
@@ -417,7 +432,6 @@ class EmrEksCreateClusterOperator(BaseOperator):
|
|
417
432
|
eks_cluster_name: str,
|
418
433
|
eks_namespace: str,
|
419
434
|
virtual_cluster_id: str = "",
|
420
|
-
aws_conn_id: str | None = "aws_default",
|
421
435
|
tags: dict | None = None,
|
422
436
|
**kwargs: Any,
|
423
437
|
) -> None:
|
@@ -426,14 +440,8 @@ class EmrEksCreateClusterOperator(BaseOperator):
|
|
426
440
|
self.eks_cluster_name = eks_cluster_name
|
427
441
|
self.eks_namespace = eks_namespace
|
428
442
|
self.virtual_cluster_id = virtual_cluster_id
|
429
|
-
self.aws_conn_id = aws_conn_id
|
430
443
|
self.tags = tags
|
431
444
|
|
432
|
-
@cached_property
|
433
|
-
def hook(self) -> EmrContainerHook:
|
434
|
-
"""Create and return an EmrContainerHook."""
|
435
|
-
return EmrContainerHook(self.aws_conn_id)
|
436
|
-
|
437
445
|
def execute(self, context: Context) -> str | None:
|
438
446
|
"""Create EMR on EKS virtual Cluster."""
|
439
447
|
self.virtual_cluster_id = self.hook.create_emr_on_eks_cluster(
|
@@ -442,7 +450,7 @@ class EmrEksCreateClusterOperator(BaseOperator):
|
|
442
450
|
return self.virtual_cluster_id
|
443
451
|
|
444
452
|
|
445
|
-
class EmrContainerOperator(
|
453
|
+
class EmrContainerOperator(AwsBaseOperator[EmrContainerHook]):
|
446
454
|
"""
|
447
455
|
An operator that submits jobs to EMR on EKS virtual clusters.
|
448
456
|
|
@@ -461,6 +469,13 @@ class EmrContainerOperator(BaseOperator):
|
|
461
469
|
Use this if you want to specify a unique ID to prevent two jobs from getting started.
|
462
470
|
If no token is provided, a UUIDv4 token will be generated for you.
|
463
471
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
472
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
473
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
474
|
+
empty, then default boto3 configuration would be used (and must be
|
475
|
+
maintained on each worker node).
|
476
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
477
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
478
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
464
479
|
:param wait_for_completion: Whether or not to wait in the operator for the job to complete.
|
465
480
|
:param poll_interval: Time (in seconds) to wait between two consecutive calls to check query status on EMR
|
466
481
|
:param max_polling_attempts: Maximum number of times to wait for the job run to finish.
|
@@ -472,7 +487,8 @@ class EmrContainerOperator(BaseOperator):
|
|
472
487
|
:param deferrable: Run operator in the deferrable mode.
|
473
488
|
"""
|
474
489
|
|
475
|
-
|
490
|
+
aws_hook_class = EmrContainerHook
|
491
|
+
template_fields: Sequence[str] = aws_template_fields(
|
476
492
|
"name",
|
477
493
|
"virtual_cluster_id",
|
478
494
|
"execution_role_arn",
|
@@ -492,7 +508,6 @@ class EmrContainerOperator(BaseOperator):
|
|
492
508
|
job_driver: dict,
|
493
509
|
configuration_overrides: dict | None = None,
|
494
510
|
client_request_token: str | None = None,
|
495
|
-
aws_conn_id: str | None = "aws_default",
|
496
511
|
wait_for_completion: bool = True,
|
497
512
|
poll_interval: int = 30,
|
498
513
|
tags: dict | None = None,
|
@@ -508,7 +523,6 @@ class EmrContainerOperator(BaseOperator):
|
|
508
523
|
self.release_label = release_label
|
509
524
|
self.job_driver = job_driver
|
510
525
|
self.configuration_overrides = configuration_overrides or {}
|
511
|
-
self.aws_conn_id = aws_conn_id
|
512
526
|
self.client_request_token = client_request_token or str(uuid4())
|
513
527
|
self.wait_for_completion = wait_for_completion
|
514
528
|
self.poll_interval = poll_interval
|
@@ -518,13 +532,9 @@ class EmrContainerOperator(BaseOperator):
|
|
518
532
|
self.job_id: str | None = None
|
519
533
|
self.deferrable = deferrable
|
520
534
|
|
521
|
-
@
|
522
|
-
def
|
523
|
-
|
524
|
-
return EmrContainerHook(
|
525
|
-
self.aws_conn_id,
|
526
|
-
virtual_cluster_id=self.virtual_cluster_id,
|
527
|
-
)
|
535
|
+
@property
|
536
|
+
def _hook_parameters(self):
|
537
|
+
return {**super()._hook_parameters, "virtual_cluster_id": self.virtual_cluster_id}
|
528
538
|
|
529
539
|
def execute(self, context: Context) -> str | None:
|
530
540
|
"""Run job on EMR Containers."""
|
@@ -619,7 +629,7 @@ class EmrContainerOperator(BaseOperator):
|
|
619
629
|
self.hook.poll_query_status(self.job_id)
|
620
630
|
|
621
631
|
|
622
|
-
class EmrCreateJobFlowOperator(
|
632
|
+
class EmrCreateJobFlowOperator(AwsBaseOperator[EmrHook]):
|
623
633
|
"""
|
624
634
|
Creates an EMR JobFlow, reading the config from the EMR connection.
|
625
635
|
|
@@ -629,11 +639,6 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
629
639
|
For more information on how to use this operator, take a look at the guide:
|
630
640
|
:ref:`howto/operator:EmrCreateJobFlowOperator`
|
631
641
|
|
632
|
-
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
633
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
634
|
-
running Airflow in a distributed manner and aws_conn_id is None or
|
635
|
-
empty, then default boto3 configuration would be used (and must be
|
636
|
-
maintained on each worker node)
|
637
642
|
:param emr_conn_id: :ref:`Amazon Elastic MapReduce Connection <howto/connection:emr>`.
|
638
643
|
Use to receive an initial Amazon EMR cluster configuration:
|
639
644
|
``boto3.client('emr').run_job_flow`` request body.
|
@@ -641,7 +646,14 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
641
646
|
then an empty initial configuration is used.
|
642
647
|
:param job_flow_overrides: boto3 style arguments or reference to an arguments file
|
643
648
|
(must be '.json') to override specific ``emr_conn_id`` extra parameters. (templated)
|
644
|
-
:param
|
649
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
650
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
651
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
652
|
+
empty, then default boto3 configuration would be used (and must be
|
653
|
+
maintained on each worker node).
|
654
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
655
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
656
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
645
657
|
:param wait_for_completion: Deprecated - use `wait_policy` instead.
|
646
658
|
Whether to finish task immediately after creation (False) or wait for jobflow
|
647
659
|
completion (True)
|
@@ -657,7 +669,8 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
657
669
|
(default: False)
|
658
670
|
"""
|
659
671
|
|
660
|
-
|
672
|
+
aws_hook_class = EmrHook
|
673
|
+
template_fields: Sequence[str] = aws_template_fields(
|
661
674
|
"job_flow_overrides",
|
662
675
|
"waiter_delay",
|
663
676
|
"waiter_max_attempts",
|
@@ -673,10 +686,8 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
673
686
|
def __init__(
|
674
687
|
self,
|
675
688
|
*,
|
676
|
-
aws_conn_id: str | None = "aws_default",
|
677
689
|
emr_conn_id: str | None = "emr_default",
|
678
690
|
job_flow_overrides: str | dict[str, Any] | None = None,
|
679
|
-
region_name: str | None = None,
|
680
691
|
wait_for_completion: bool | None = None,
|
681
692
|
wait_policy: WaitPolicy | None = None,
|
682
693
|
waiter_max_attempts: int | None = None,
|
@@ -685,10 +696,8 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
685
696
|
**kwargs: Any,
|
686
697
|
):
|
687
698
|
super().__init__(**kwargs)
|
688
|
-
self.aws_conn_id = aws_conn_id
|
689
699
|
self.emr_conn_id = emr_conn_id
|
690
700
|
self.job_flow_overrides = job_flow_overrides or {}
|
691
|
-
self.region_name = region_name
|
692
701
|
self.wait_policy = wait_policy
|
693
702
|
self.waiter_max_attempts = waiter_max_attempts or 60
|
694
703
|
self.waiter_delay = waiter_delay or 60
|
@@ -703,12 +712,9 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
703
712
|
# preserve previous behaviour
|
704
713
|
self.wait_policy = WaitPolicy.WAIT_FOR_COMPLETION if wait_for_completion else None
|
705
714
|
|
706
|
-
@
|
707
|
-
def
|
708
|
-
|
709
|
-
return EmrHook(
|
710
|
-
aws_conn_id=self.aws_conn_id, emr_conn_id=self.emr_conn_id, region_name=self.region_name
|
711
|
-
)
|
715
|
+
@property
|
716
|
+
def _hook_parameters(self):
|
717
|
+
return {**super()._hook_parameters, "emr_conn_id": self.emr_conn_id}
|
712
718
|
|
713
719
|
def execute(self, context: Context) -> str | None:
|
714
720
|
self.log.info(
|
@@ -719,7 +725,7 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
719
725
|
self.job_flow_overrides = job_flow_overrides
|
720
726
|
else:
|
721
727
|
job_flow_overrides = self.job_flow_overrides
|
722
|
-
response = self.
|
728
|
+
response = self.hook.create_job_flow(job_flow_overrides)
|
723
729
|
|
724
730
|
if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
725
731
|
raise AirflowException(f"Job flow creation failed: {response}")
|
@@ -729,18 +735,18 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
729
735
|
EmrClusterLink.persist(
|
730
736
|
context=context,
|
731
737
|
operator=self,
|
732
|
-
region_name=self.
|
733
|
-
aws_partition=self.
|
738
|
+
region_name=self.hook.conn_region_name,
|
739
|
+
aws_partition=self.hook.conn_partition,
|
734
740
|
job_flow_id=self._job_flow_id,
|
735
741
|
)
|
736
742
|
if self._job_flow_id:
|
737
743
|
EmrLogsLink.persist(
|
738
744
|
context=context,
|
739
745
|
operator=self,
|
740
|
-
region_name=self.
|
741
|
-
aws_partition=self.
|
746
|
+
region_name=self.hook.conn_region_name,
|
747
|
+
aws_partition=self.hook.conn_partition,
|
742
748
|
job_flow_id=self._job_flow_id,
|
743
|
-
log_uri=get_log_uri(emr_client=self.
|
749
|
+
log_uri=get_log_uri(emr_client=self.hook.conn, job_flow_id=self._job_flow_id),
|
744
750
|
)
|
745
751
|
if self.deferrable:
|
746
752
|
self.defer(
|
@@ -757,7 +763,7 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
757
763
|
)
|
758
764
|
if self.wait_policy:
|
759
765
|
waiter_name = WAITER_POLICY_NAME_MAPPING[self.wait_policy]
|
760
|
-
self.
|
766
|
+
self.hook.get_waiter(waiter_name).wait(
|
761
767
|
ClusterId=self._job_flow_id,
|
762
768
|
WaiterConfig=prune_dict(
|
763
769
|
{
|
@@ -781,10 +787,10 @@ class EmrCreateJobFlowOperator(BaseOperator):
|
|
781
787
|
"""Terminate the EMR cluster (job flow) unless TerminationProtected is enabled on the cluster."""
|
782
788
|
if self._job_flow_id:
|
783
789
|
self.log.info("Terminating job flow %s", self._job_flow_id)
|
784
|
-
self.
|
790
|
+
self.hook.conn.terminate_job_flows(JobFlowIds=[self._job_flow_id])
|
785
791
|
|
786
792
|
|
787
|
-
class EmrModifyClusterOperator(
|
793
|
+
class EmrModifyClusterOperator(AwsBaseOperator[EmrHook]):
|
788
794
|
"""
|
789
795
|
An operator that modifies an existing EMR cluster.
|
790
796
|
|
@@ -794,12 +800,19 @@ class EmrModifyClusterOperator(BaseOperator):
|
|
794
800
|
|
795
801
|
:param cluster_id: cluster identifier
|
796
802
|
:param step_concurrency_level: Concurrency of the cluster
|
797
|
-
:param aws_conn_id:
|
798
|
-
|
803
|
+
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
804
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
805
|
+
running Airflow in a distributed manner and aws_conn_id is None or
|
806
|
+
empty, then default boto3 configuration would be used (and must be
|
807
|
+
maintained on each worker node).
|
808
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
809
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
810
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
799
811
|
:param do_xcom_push: if True, cluster_id is pushed to XCom with key cluster_id.
|
800
812
|
"""
|
801
813
|
|
802
|
-
|
814
|
+
aws_hook_class = EmrHook
|
815
|
+
template_fields: Sequence[str] = aws_template_fields("cluster_id", "step_concurrency_level")
|
803
816
|
template_ext: Sequence[str] = ()
|
804
817
|
ui_color = "#f9c915"
|
805
818
|
operator_extra_links = (
|
@@ -812,39 +825,34 @@ class EmrModifyClusterOperator(BaseOperator):
|
|
812
825
|
*,
|
813
826
|
cluster_id: str,
|
814
827
|
step_concurrency_level: int,
|
815
|
-
aws_conn_id: str | None = "aws_default",
|
816
828
|
**kwargs,
|
817
829
|
):
|
818
830
|
super().__init__(**kwargs)
|
819
|
-
self.aws_conn_id = aws_conn_id
|
820
831
|
self.cluster_id = cluster_id
|
821
832
|
self.step_concurrency_level = step_concurrency_level
|
822
833
|
|
823
834
|
def execute(self, context: Context) -> int:
|
824
|
-
emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)
|
825
|
-
emr = emr_hook.get_conn()
|
826
|
-
|
827
835
|
if self.do_xcom_push:
|
828
836
|
context["ti"].xcom_push(key="cluster_id", value=self.cluster_id)
|
829
837
|
|
830
838
|
EmrClusterLink.persist(
|
831
839
|
context=context,
|
832
840
|
operator=self,
|
833
|
-
region_name=
|
834
|
-
aws_partition=
|
841
|
+
region_name=self.hook.conn_region_name,
|
842
|
+
aws_partition=self.hook.conn_partition,
|
835
843
|
job_flow_id=self.cluster_id,
|
836
844
|
)
|
837
845
|
EmrLogsLink.persist(
|
838
846
|
context=context,
|
839
847
|
operator=self,
|
840
|
-
region_name=
|
841
|
-
aws_partition=
|
848
|
+
region_name=self.hook.conn_region_name,
|
849
|
+
aws_partition=self.hook.conn_partition,
|
842
850
|
job_flow_id=self.cluster_id,
|
843
|
-
log_uri=get_log_uri(emr_client=
|
851
|
+
log_uri=get_log_uri(emr_client=self.hook.conn, job_flow_id=self.cluster_id),
|
844
852
|
)
|
845
853
|
|
846
854
|
self.log.info("Modifying cluster %s", self.cluster_id)
|
847
|
-
response =
|
855
|
+
response = self.hook.conn.modify_cluster(
|
848
856
|
ClusterId=self.cluster_id, StepConcurrencyLevel=self.step_concurrency_level
|
849
857
|
)
|
850
858
|
|
@@ -854,7 +862,7 @@ class EmrModifyClusterOperator(BaseOperator):
|
|
854
862
|
return response["StepConcurrencyLevel"]
|
855
863
|
|
856
864
|
|
857
|
-
class EmrTerminateJobFlowOperator(
|
865
|
+
class EmrTerminateJobFlowOperator(AwsBaseOperator[EmrHook]):
|
858
866
|
"""
|
859
867
|
Operator to terminate EMR JobFlows.
|
860
868
|
|
@@ -864,10 +872,13 @@ class EmrTerminateJobFlowOperator(BaseOperator):
|
|
864
872
|
|
865
873
|
:param job_flow_id: id of the JobFlow to terminate. (templated)
|
866
874
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
867
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
875
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
868
876
|
running Airflow in a distributed manner and aws_conn_id is None or
|
869
877
|
empty, then default boto3 configuration would be used (and must be
|
870
878
|
maintained on each worker node).
|
879
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
880
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
881
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
871
882
|
:param waiter_delay: Time (in seconds) to wait between two consecutive calls to check JobFlow status
|
872
883
|
:param waiter_max_attempts: The maximum number of times to poll for JobFlow status.
|
873
884
|
:param deferrable: If True, the operator will wait asynchronously for the crawl to complete.
|
@@ -875,7 +886,10 @@ class EmrTerminateJobFlowOperator(BaseOperator):
|
|
875
886
|
(default: False)
|
876
887
|
"""
|
877
888
|
|
878
|
-
|
889
|
+
aws_hook_class = EmrHook
|
890
|
+
template_fields: Sequence[str] = aws_template_fields(
|
891
|
+
"job_flow_id",
|
892
|
+
)
|
879
893
|
template_ext: Sequence[str] = ()
|
880
894
|
ui_color = "#f9c915"
|
881
895
|
operator_extra_links = (
|
@@ -887,7 +901,6 @@ class EmrTerminateJobFlowOperator(BaseOperator):
|
|
887
901
|
self,
|
888
902
|
*,
|
889
903
|
job_flow_id: str,
|
890
|
-
aws_conn_id: str | None = "aws_default",
|
891
904
|
waiter_delay: int = 60,
|
892
905
|
waiter_max_attempts: int = 20,
|
893
906
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
@@ -895,33 +908,29 @@ class EmrTerminateJobFlowOperator(BaseOperator):
|
|
895
908
|
):
|
896
909
|
super().__init__(**kwargs)
|
897
910
|
self.job_flow_id = job_flow_id
|
898
|
-
self.aws_conn_id = aws_conn_id
|
899
911
|
self.waiter_delay = waiter_delay
|
900
912
|
self.waiter_max_attempts = waiter_max_attempts
|
901
913
|
self.deferrable = deferrable
|
902
914
|
|
903
915
|
def execute(self, context: Context) -> None:
|
904
|
-
emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)
|
905
|
-
emr = emr_hook.get_conn()
|
906
|
-
|
907
916
|
EmrClusterLink.persist(
|
908
917
|
context=context,
|
909
918
|
operator=self,
|
910
|
-
region_name=
|
911
|
-
aws_partition=
|
919
|
+
region_name=self.hook.conn_region_name,
|
920
|
+
aws_partition=self.hook.conn_partition,
|
912
921
|
job_flow_id=self.job_flow_id,
|
913
922
|
)
|
914
923
|
EmrLogsLink.persist(
|
915
924
|
context=context,
|
916
925
|
operator=self,
|
917
|
-
region_name=
|
918
|
-
aws_partition=
|
926
|
+
region_name=self.hook.conn_region_name,
|
927
|
+
aws_partition=self.hook.conn_partition,
|
919
928
|
job_flow_id=self.job_flow_id,
|
920
|
-
log_uri=get_log_uri(emr_client=
|
929
|
+
log_uri=get_log_uri(emr_client=self.hook.conn, job_flow_id=self.job_flow_id),
|
921
930
|
)
|
922
931
|
|
923
932
|
self.log.info("Terminating JobFlow %s", self.job_flow_id)
|
924
|
-
response =
|
933
|
+
response = self.hook.conn.terminate_job_flows(JobFlowIds=[self.job_flow_id])
|
925
934
|
|
926
935
|
if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
927
936
|
raise AirflowException(f"JobFlow termination failed: {response}")
|
@@ -951,7 +960,7 @@ class EmrTerminateJobFlowOperator(BaseOperator):
|
|
951
960
|
self.log.info("Jobflow terminated successfully.")
|
952
961
|
|
953
962
|
|
954
|
-
class EmrServerlessCreateApplicationOperator(
|
963
|
+
class EmrServerlessCreateApplicationOperator(AwsBaseOperator[EmrServerlessHook]):
|
955
964
|
"""
|
956
965
|
Operator to create Serverless EMR Application.
|
957
966
|
|
@@ -968,10 +977,13 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
968
977
|
Its value must be unique for each request.
|
969
978
|
:param config: Optional dictionary for arbitrary parameters to the boto API create_application call.
|
970
979
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
971
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
980
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
972
981
|
running Airflow in a distributed manner and aws_conn_id is None or
|
973
982
|
empty, then default boto3 configuration would be used (and must be
|
974
983
|
maintained on each worker node).
|
984
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
985
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
986
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
975
987
|
:waiter_max_attempts: Number of times the waiter should poll the application to check the state.
|
976
988
|
If not set, the waiter will use its default value.
|
977
989
|
:param waiter_delay: Number of seconds between polling the state of the application.
|
@@ -980,6 +992,8 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
980
992
|
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
981
993
|
"""
|
982
994
|
|
995
|
+
aws_hook_class = EmrServerlessHook
|
996
|
+
|
983
997
|
def __init__(
|
984
998
|
self,
|
985
999
|
release_label: str,
|
@@ -987,7 +1001,6 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
987
1001
|
client_request_token: str = "",
|
988
1002
|
config: dict | None = None,
|
989
1003
|
wait_for_completion: bool = True,
|
990
|
-
aws_conn_id: str | None = "aws_default",
|
991
1004
|
waiter_max_attempts: int | ArgNotSet = NOTSET,
|
992
1005
|
waiter_delay: int | ArgNotSet = NOTSET,
|
993
1006
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
@@ -996,7 +1009,6 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
996
1009
|
waiter_delay = 60 if waiter_delay is NOTSET else waiter_delay
|
997
1010
|
waiter_max_attempts = 25 if waiter_max_attempts is NOTSET else waiter_max_attempts
|
998
1011
|
|
999
|
-
self.aws_conn_id = aws_conn_id
|
1000
1012
|
self.release_label = release_label
|
1001
1013
|
self.job_type = job_type
|
1002
1014
|
self.wait_for_completion = wait_for_completion
|
@@ -1009,11 +1021,6 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
1009
1021
|
|
1010
1022
|
self.client_request_token = client_request_token or str(uuid4())
|
1011
1023
|
|
1012
|
-
@cached_property
|
1013
|
-
def hook(self) -> EmrServerlessHook:
|
1014
|
-
"""Create and return an EmrServerlessHook."""
|
1015
|
-
return EmrServerlessHook(aws_conn_id=self.aws_conn_id)
|
1016
|
-
|
1017
1024
|
def execute(self, context: Context) -> str | None:
|
1018
1025
|
response = self.hook.conn.create_application(
|
1019
1026
|
clientToken=self.client_request_token,
|
@@ -1094,7 +1101,7 @@ class EmrServerlessCreateApplicationOperator(BaseOperator):
|
|
1094
1101
|
return validated_event["application_id"]
|
1095
1102
|
|
1096
1103
|
|
1097
|
-
class EmrServerlessStartJobOperator(
|
1104
|
+
class EmrServerlessStartJobOperator(AwsBaseOperator[EmrServerlessHook]):
|
1098
1105
|
"""
|
1099
1106
|
Operator to start EMR Serverless job.
|
1100
1107
|
|
@@ -1113,10 +1120,13 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1113
1120
|
If set to False, ``waiter_countdown`` and ``waiter_check_interval_seconds`` will only be applied
|
1114
1121
|
when waiting for the application be to in the ``STARTED`` state.
|
1115
1122
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
1116
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
1123
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
1117
1124
|
running Airflow in a distributed manner and aws_conn_id is None or
|
1118
1125
|
empty, then default boto3 configuration would be used (and must be
|
1119
1126
|
maintained on each worker node).
|
1127
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
1128
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
1129
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
1120
1130
|
:param name: Name for the EMR Serverless job. If not provided, a default name will be assigned.
|
1121
1131
|
:waiter_max_attempts: Number of times the waiter should poll the application to check the state.
|
1122
1132
|
If not set, the waiter will use its default value.
|
@@ -1129,14 +1139,14 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1129
1139
|
Tez UI or Spark stdout logs. Defaults to False.
|
1130
1140
|
"""
|
1131
1141
|
|
1132
|
-
|
1142
|
+
aws_hook_class = EmrServerlessHook
|
1143
|
+
template_fields: Sequence[str] = aws_template_fields(
|
1133
1144
|
"application_id",
|
1134
1145
|
"config",
|
1135
1146
|
"execution_role_arn",
|
1136
1147
|
"job_driver",
|
1137
1148
|
"configuration_overrides",
|
1138
1149
|
"name",
|
1139
|
-
"aws_conn_id",
|
1140
1150
|
)
|
1141
1151
|
|
1142
1152
|
template_fields_renderers = {
|
@@ -1160,7 +1170,6 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1160
1170
|
client_request_token: str = "",
|
1161
1171
|
config: dict | None = None,
|
1162
1172
|
wait_for_completion: bool = True,
|
1163
|
-
aws_conn_id: str | None = "aws_default",
|
1164
1173
|
name: str | None = None,
|
1165
1174
|
waiter_max_attempts: int | ArgNotSet = NOTSET,
|
1166
1175
|
waiter_delay: int | ArgNotSet = NOTSET,
|
@@ -1171,7 +1180,6 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1171
1180
|
waiter_delay = 60 if waiter_delay is NOTSET else waiter_delay
|
1172
1181
|
waiter_max_attempts = 25 if waiter_max_attempts is NOTSET else waiter_max_attempts
|
1173
1182
|
|
1174
|
-
self.aws_conn_id = aws_conn_id
|
1175
1183
|
self.application_id = application_id
|
1176
1184
|
self.execution_role_arn = execution_role_arn
|
1177
1185
|
self.job_driver = job_driver
|
@@ -1188,11 +1196,6 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1188
1196
|
|
1189
1197
|
self.client_request_token = client_request_token or str(uuid4())
|
1190
1198
|
|
1191
|
-
@cached_property
|
1192
|
-
def hook(self) -> EmrServerlessHook:
|
1193
|
-
"""Create and return an EmrServerlessHook."""
|
1194
|
-
return EmrServerlessHook(aws_conn_id=self.aws_conn_id)
|
1195
|
-
|
1196
1199
|
def execute(self, context: Context, event: dict[str, Any] | None = None) -> str | None:
|
1197
1200
|
app_state = self.hook.conn.get_application(applicationId=self.application_id)["application"]["state"]
|
1198
1201
|
if app_state not in EmrServerlessHook.APPLICATION_SUCCESS_STATES:
|
@@ -1413,7 +1416,7 @@ class EmrServerlessStartJobOperator(BaseOperator):
|
|
1413
1416
|
self.log.info("CloudWatch logs available at: %s", emrs_cloudwatch_url)
|
1414
1417
|
|
1415
1418
|
|
1416
|
-
class EmrServerlessStopApplicationOperator(
|
1419
|
+
class EmrServerlessStopApplicationOperator(AwsBaseOperator[EmrServerlessHook]):
|
1417
1420
|
"""
|
1418
1421
|
Operator to stop an EMR Serverless application.
|
1419
1422
|
|
@@ -1424,10 +1427,13 @@ class EmrServerlessStopApplicationOperator(BaseOperator):
|
|
1424
1427
|
:param application_id: ID of the EMR Serverless application to stop.
|
1425
1428
|
:param wait_for_completion: If true, wait for the Application to stop before returning. Default to True
|
1426
1429
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
1427
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
1430
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
1428
1431
|
running Airflow in a distributed manner and aws_conn_id is None or
|
1429
1432
|
empty, then default boto3 configuration would be used (and must be
|
1430
1433
|
maintained on each worker node).
|
1434
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
1435
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
1436
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
1431
1437
|
:param force_stop: If set to True, any job for that app that is not in a terminal state will be cancelled.
|
1432
1438
|
Otherwise, trying to stop an app with running jobs will return an error.
|
1433
1439
|
If you want to wait for the jobs to finish gracefully, use
|
@@ -1441,13 +1447,15 @@ class EmrServerlessStopApplicationOperator(BaseOperator):
|
|
1441
1447
|
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
1442
1448
|
"""
|
1443
1449
|
|
1444
|
-
|
1450
|
+
aws_hook_class = EmrServerlessHook
|
1451
|
+
template_fields: Sequence[str] = aws_template_fields(
|
1452
|
+
"application_id",
|
1453
|
+
)
|
1445
1454
|
|
1446
1455
|
def __init__(
|
1447
1456
|
self,
|
1448
1457
|
application_id: str,
|
1449
1458
|
wait_for_completion: bool = True,
|
1450
|
-
aws_conn_id: str | None = "aws_default",
|
1451
1459
|
waiter_max_attempts: int | ArgNotSet = NOTSET,
|
1452
1460
|
waiter_delay: int | ArgNotSet = NOTSET,
|
1453
1461
|
force_stop: bool = False,
|
@@ -1457,7 +1465,6 @@ class EmrServerlessStopApplicationOperator(BaseOperator):
|
|
1457
1465
|
waiter_delay = 60 if waiter_delay is NOTSET else waiter_delay
|
1458
1466
|
waiter_max_attempts = 25 if waiter_max_attempts is NOTSET else waiter_max_attempts
|
1459
1467
|
|
1460
|
-
self.aws_conn_id = aws_conn_id
|
1461
1468
|
self.application_id = application_id
|
1462
1469
|
self.wait_for_completion = False if deferrable else wait_for_completion
|
1463
1470
|
self.waiter_max_attempts = int(waiter_max_attempts) # type: ignore[arg-type]
|
@@ -1466,11 +1473,6 @@ class EmrServerlessStopApplicationOperator(BaseOperator):
|
|
1466
1473
|
self.deferrable = deferrable
|
1467
1474
|
super().__init__(**kwargs)
|
1468
1475
|
|
1469
|
-
@cached_property
|
1470
|
-
def hook(self) -> EmrServerlessHook:
|
1471
|
-
"""Create and return an EmrServerlessHook."""
|
1472
|
-
return EmrServerlessHook(aws_conn_id=self.aws_conn_id)
|
1473
|
-
|
1474
1476
|
def execute(self, context: Context) -> None:
|
1475
1477
|
self.log.info("Stopping application: %s", self.application_id)
|
1476
1478
|
|
@@ -1564,10 +1566,13 @@ class EmrServerlessDeleteApplicationOperator(EmrServerlessStopApplicationOperato
|
|
1564
1566
|
:param wait_for_completion: If true, wait for the Application to be deleted before returning.
|
1565
1567
|
Defaults to True. Note that this operator will always wait for the application to be STOPPED first.
|
1566
1568
|
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
1567
|
-
If this is None or empty then the default boto3 behaviour is used. If
|
1569
|
+
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
1568
1570
|
running Airflow in a distributed manner and aws_conn_id is None or
|
1569
1571
|
empty, then default boto3 configuration would be used (and must be
|
1570
1572
|
maintained on each worker node).
|
1573
|
+
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
1574
|
+
:param verify: Whether or not to verify SSL certificates. See:
|
1575
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
1571
1576
|
:waiter_max_attempts: Number of times the waiter should poll the application to check the state.
|
1572
1577
|
Defaults to 25.
|
1573
1578
|
:param waiter_delay: Number of seconds between polling the state of the application.
|
@@ -1581,13 +1586,14 @@ class EmrServerlessDeleteApplicationOperator(EmrServerlessStopApplicationOperato
|
|
1581
1586
|
:class:`airflow.providers.amazon.aws.sensors.emr.EmrServerlessJobSensor`
|
1582
1587
|
"""
|
1583
1588
|
|
1584
|
-
template_fields: Sequence[str] = (
|
1589
|
+
template_fields: Sequence[str] = aws_template_fields(
|
1590
|
+
"application_id",
|
1591
|
+
)
|
1585
1592
|
|
1586
1593
|
def __init__(
|
1587
1594
|
self,
|
1588
1595
|
application_id: str,
|
1589
1596
|
wait_for_completion: bool = True,
|
1590
|
-
aws_conn_id: str | None = "aws_default",
|
1591
1597
|
waiter_max_attempts: int | ArgNotSet = NOTSET,
|
1592
1598
|
waiter_delay: int | ArgNotSet = NOTSET,
|
1593
1599
|
force_stop: bool = False,
|
@@ -1603,7 +1609,6 @@ class EmrServerlessDeleteApplicationOperator(EmrServerlessStopApplicationOperato
|
|
1603
1609
|
application_id=application_id,
|
1604
1610
|
# when deleting an app, we always need to wait for it to stop before we can call delete()
|
1605
1611
|
wait_for_completion=True,
|
1606
|
-
aws_conn_id=aws_conn_id,
|
1607
1612
|
waiter_delay=waiter_delay,
|
1608
1613
|
waiter_max_attempts=waiter_max_attempts,
|
1609
1614
|
force_stop=force_stop,
|