apache-airflow-providers-amazon 8.24.0__py3-none-any.whl → 8.24.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/amazon/LICENSE +4 -4
- airflow/providers/amazon/aws/hooks/base_aws.py +3 -8
- airflow/providers/amazon/aws/hooks/glue.py +0 -123
- airflow/providers/amazon/aws/operators/bedrock.py +20 -6
- airflow/providers/amazon/aws/operators/emr.py +30 -38
- airflow/providers/amazon/aws/operators/glue.py +2 -408
- airflow/providers/amazon/aws/operators/sagemaker.py +12 -85
- airflow/providers/amazon/aws/sensors/glue.py +2 -260
- airflow/providers/amazon/aws/sensors/s3.py +5 -35
- airflow/providers/amazon/aws/transfers/dynamodb_to_s3.py +1 -0
- airflow/providers/amazon/aws/triggers/glue.py +2 -76
- airflow/providers/amazon/aws/waiters/glue.json +0 -98
- airflow/providers/amazon/get_provider_info.py +12 -18
- {apache_airflow_providers_amazon-8.24.0.dist-info → apache_airflow_providers_amazon-8.24.0rc1.dist-info}/METADATA +17 -18
- {apache_airflow_providers_amazon-8.24.0.dist-info → apache_airflow_providers_amazon-8.24.0rc1.dist-info}/RECORD +17 -17
- {apache_airflow_providers_amazon-8.24.0.dist-info → apache_airflow_providers_amazon-8.24.0rc1.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_amazon-8.24.0.dist-info → apache_airflow_providers_amazon-8.24.0rc1.dist-info}/entry_points.txt +0 -0
@@ -18,18 +18,10 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
from functools import cached_property
|
21
|
-
from typing import TYPE_CHECKING,
|
21
|
+
from typing import TYPE_CHECKING, Sequence
|
22
22
|
|
23
|
-
from airflow.configuration import conf
|
24
23
|
from airflow.exceptions import AirflowException, AirflowSkipException
|
25
|
-
from airflow.providers.amazon.aws.hooks.glue import
|
26
|
-
from airflow.providers.amazon.aws.sensors.base_aws import AwsBaseSensor
|
27
|
-
from airflow.providers.amazon.aws.triggers.glue import (
|
28
|
-
GlueDataQualityRuleRecommendationRunCompleteTrigger,
|
29
|
-
GlueDataQualityRuleSetEvaluationRunCompleteTrigger,
|
30
|
-
)
|
31
|
-
from airflow.providers.amazon.aws.utils import validate_execute_complete_event
|
32
|
-
from airflow.providers.amazon.aws.utils.mixins import aws_template_fields
|
24
|
+
from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
|
33
25
|
from airflow.sensors.base import BaseSensorOperator
|
34
26
|
|
35
27
|
if TYPE_CHECKING:
|
@@ -99,253 +91,3 @@ class GlueJobSensor(BaseSensorOperator):
|
|
99
91
|
run_id=self.run_id,
|
100
92
|
continuation_tokens=self.next_log_tokens,
|
101
93
|
)
|
102
|
-
|
103
|
-
|
104
|
-
class GlueDataQualityRuleSetEvaluationRunSensor(AwsBaseSensor[GlueDataQualityHook]):
|
105
|
-
"""
|
106
|
-
Waits for an AWS Glue data quality ruleset evaluation run to reach any of the status below.
|
107
|
-
|
108
|
-
'FAILED', 'STOPPED', 'STOPPING', 'TIMEOUT', 'SUCCEEDED'
|
109
|
-
|
110
|
-
.. seealso::
|
111
|
-
For more information on how to use this sensor, take a look at the guide:
|
112
|
-
:ref:`howto/sensor:GlueDataQualityRuleSetEvaluationRunSensor`
|
113
|
-
|
114
|
-
:param evaluation_run_id: The AWS Glue data quality ruleset evaluation run identifier.
|
115
|
-
:param verify_result_status: Validate all the ruleset rules evaluation run results,
|
116
|
-
If any of the rule status is Fail or Error then an exception is thrown. (default: True)
|
117
|
-
:param show_results: Displays all the ruleset rules evaluation run results. (default: True)
|
118
|
-
:param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
|
119
|
-
module to be installed.
|
120
|
-
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
121
|
-
:param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
|
122
|
-
:param max_retries: Number of times before returning the current state. (default: 60)
|
123
|
-
|
124
|
-
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
125
|
-
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
126
|
-
running Airflow in a distributed manner and aws_conn_id is None or
|
127
|
-
empty, then default boto3 configuration would be used (and must be
|
128
|
-
maintained on each worker node).
|
129
|
-
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
130
|
-
:param verify: Whether to verify SSL certificates. See:
|
131
|
-
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
132
|
-
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
133
|
-
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
134
|
-
"""
|
135
|
-
|
136
|
-
SUCCESS_STATES = ("SUCCEEDED",)
|
137
|
-
|
138
|
-
FAILURE_STATES = ("FAILED", "STOPPED", "STOPPING", "TIMEOUT")
|
139
|
-
|
140
|
-
aws_hook_class = GlueDataQualityHook
|
141
|
-
template_fields: Sequence[str] = aws_template_fields("evaluation_run_id")
|
142
|
-
|
143
|
-
def __init__(
|
144
|
-
self,
|
145
|
-
*,
|
146
|
-
evaluation_run_id: str,
|
147
|
-
show_results: bool = True,
|
148
|
-
verify_result_status: bool = True,
|
149
|
-
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
150
|
-
poke_interval: int = 120,
|
151
|
-
max_retries: int = 60,
|
152
|
-
aws_conn_id: str | None = "aws_default",
|
153
|
-
**kwargs,
|
154
|
-
):
|
155
|
-
super().__init__(**kwargs)
|
156
|
-
self.evaluation_run_id = evaluation_run_id
|
157
|
-
self.show_results = show_results
|
158
|
-
self.verify_result_status = verify_result_status
|
159
|
-
self.aws_conn_id = aws_conn_id
|
160
|
-
self.max_retries = max_retries
|
161
|
-
self.poke_interval = poke_interval
|
162
|
-
self.deferrable = deferrable
|
163
|
-
|
164
|
-
def execute(self, context: Context) -> Any:
|
165
|
-
if self.deferrable:
|
166
|
-
self.defer(
|
167
|
-
trigger=GlueDataQualityRuleSetEvaluationRunCompleteTrigger(
|
168
|
-
evaluation_run_id=self.evaluation_run_id,
|
169
|
-
waiter_delay=int(self.poke_interval),
|
170
|
-
waiter_max_attempts=self.max_retries,
|
171
|
-
aws_conn_id=self.aws_conn_id,
|
172
|
-
),
|
173
|
-
method_name="execute_complete",
|
174
|
-
)
|
175
|
-
else:
|
176
|
-
super().execute(context=context)
|
177
|
-
|
178
|
-
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
179
|
-
event = validate_execute_complete_event(event)
|
180
|
-
|
181
|
-
if event["status"] != "success":
|
182
|
-
message = f"Error: AWS Glue data quality ruleset evaluation run: {event}"
|
183
|
-
if self.soft_fail:
|
184
|
-
raise AirflowSkipException(message)
|
185
|
-
raise AirflowException(message)
|
186
|
-
|
187
|
-
self.hook.validate_evaluation_run_results(
|
188
|
-
evaluation_run_id=event["evaluation_run_id"],
|
189
|
-
show_results=self.show_results,
|
190
|
-
verify_result_status=self.verify_result_status,
|
191
|
-
)
|
192
|
-
|
193
|
-
self.log.info("AWS Glue data quality ruleset evaluation run completed.")
|
194
|
-
|
195
|
-
def poke(self, context: Context):
|
196
|
-
self.log.info(
|
197
|
-
"Poking for AWS Glue data quality ruleset evaluation run RunId: %s", self.evaluation_run_id
|
198
|
-
)
|
199
|
-
|
200
|
-
response = self.hook.conn.get_data_quality_ruleset_evaluation_run(RunId=self.evaluation_run_id)
|
201
|
-
|
202
|
-
status = response.get("Status")
|
203
|
-
|
204
|
-
if status in self.SUCCESS_STATES:
|
205
|
-
self.hook.validate_evaluation_run_results(
|
206
|
-
evaluation_run_id=self.evaluation_run_id,
|
207
|
-
show_results=self.show_results,
|
208
|
-
verify_result_status=self.verify_result_status,
|
209
|
-
)
|
210
|
-
|
211
|
-
self.log.info(
|
212
|
-
"AWS Glue data quality ruleset evaluation run completed RunId: %s Run State: %s",
|
213
|
-
self.evaluation_run_id,
|
214
|
-
response["Status"],
|
215
|
-
)
|
216
|
-
|
217
|
-
return True
|
218
|
-
|
219
|
-
elif status in self.FAILURE_STATES:
|
220
|
-
job_error_message = (
|
221
|
-
f"Error: AWS Glue data quality ruleset evaluation run RunId: {self.evaluation_run_id} Run "
|
222
|
-
f"Status: {status}"
|
223
|
-
f": {response.get('ErrorString')}"
|
224
|
-
)
|
225
|
-
self.log.info(job_error_message)
|
226
|
-
# TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
|
227
|
-
if self.soft_fail:
|
228
|
-
raise AirflowSkipException(job_error_message)
|
229
|
-
raise AirflowException(job_error_message)
|
230
|
-
else:
|
231
|
-
return False
|
232
|
-
|
233
|
-
|
234
|
-
class GlueDataQualityRuleRecommendationRunSensor(AwsBaseSensor[GlueDataQualityHook]):
|
235
|
-
"""
|
236
|
-
Waits for an AWS Glue data quality recommendation run to reach any of the status below.
|
237
|
-
|
238
|
-
'FAILED', 'STOPPED', 'STOPPING', 'TIMEOUT', 'SUCCEEDED'
|
239
|
-
|
240
|
-
.. seealso::
|
241
|
-
For more information on how to use this sensor, take a look at the guide:
|
242
|
-
:ref:`howto/sensor:GlueDataQualityRuleRecommendationRunSensor`
|
243
|
-
|
244
|
-
:param recommendation_run_id: The AWS Glue data quality rule recommendation run identifier.
|
245
|
-
:param show_results: Displays the recommended ruleset (a set of rules), when recommendation run completes. (default: True)
|
246
|
-
:param deferrable: If True, the sensor will operate in deferrable mode. This mode requires aiobotocore
|
247
|
-
module to be installed.
|
248
|
-
(default: False, but can be overridden in config file by setting default_deferrable to True)
|
249
|
-
:param poke_interval: Polling period in seconds to check for the status of the job. (default: 120)
|
250
|
-
:param max_retries: Number of times before returning the current state. (default: 60)
|
251
|
-
|
252
|
-
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
253
|
-
If this is ``None`` or empty then the default boto3 behaviour is used. If
|
254
|
-
running Airflow in a distributed manner and aws_conn_id is None or
|
255
|
-
empty, then default boto3 configuration would be used (and must be
|
256
|
-
maintained on each worker node).
|
257
|
-
:param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
|
258
|
-
:param verify: Whether to verify SSL certificates. See:
|
259
|
-
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
|
260
|
-
:param botocore_config: Configuration dictionary (key-values) for botocore client. See:
|
261
|
-
https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
|
262
|
-
"""
|
263
|
-
|
264
|
-
SUCCESS_STATES = ("SUCCEEDED",)
|
265
|
-
|
266
|
-
FAILURE_STATES = ("FAILED", "STOPPED", "STOPPING", "TIMEOUT")
|
267
|
-
|
268
|
-
aws_hook_class = GlueDataQualityHook
|
269
|
-
template_fields: Sequence[str] = aws_template_fields("recommendation_run_id")
|
270
|
-
|
271
|
-
def __init__(
|
272
|
-
self,
|
273
|
-
*,
|
274
|
-
recommendation_run_id: str,
|
275
|
-
show_results: bool = True,
|
276
|
-
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
277
|
-
poke_interval: int = 120,
|
278
|
-
max_retries: int = 60,
|
279
|
-
aws_conn_id: str | None = "aws_default",
|
280
|
-
**kwargs,
|
281
|
-
):
|
282
|
-
super().__init__(**kwargs)
|
283
|
-
self.recommendation_run_id = recommendation_run_id
|
284
|
-
self.show_results = show_results
|
285
|
-
self.deferrable = deferrable
|
286
|
-
self.poke_interval = poke_interval
|
287
|
-
self.max_retries = max_retries
|
288
|
-
self.aws_conn_id = aws_conn_id
|
289
|
-
|
290
|
-
def execute(self, context: Context) -> Any:
|
291
|
-
if self.deferrable:
|
292
|
-
self.defer(
|
293
|
-
trigger=GlueDataQualityRuleRecommendationRunCompleteTrigger(
|
294
|
-
recommendation_run_id=self.recommendation_run_id,
|
295
|
-
waiter_delay=int(self.poke_interval),
|
296
|
-
waiter_max_attempts=self.max_retries,
|
297
|
-
aws_conn_id=self.aws_conn_id,
|
298
|
-
),
|
299
|
-
method_name="execute_complete",
|
300
|
-
)
|
301
|
-
else:
|
302
|
-
super().execute(context=context)
|
303
|
-
|
304
|
-
def execute_complete(self, context: Context, event: dict[str, Any] | None = None) -> None:
|
305
|
-
event = validate_execute_complete_event(event)
|
306
|
-
|
307
|
-
if event["status"] != "success":
|
308
|
-
message = f"Error: AWS Glue data quality recommendation run: {event}"
|
309
|
-
if self.soft_fail:
|
310
|
-
raise AirflowSkipException(message)
|
311
|
-
raise AirflowException(message)
|
312
|
-
|
313
|
-
if self.show_results:
|
314
|
-
self.hook.log_recommendation_results(run_id=self.recommendation_run_id)
|
315
|
-
|
316
|
-
self.log.info("AWS Glue data quality recommendation run completed.")
|
317
|
-
|
318
|
-
def poke(self, context: Context) -> bool:
|
319
|
-
self.log.info(
|
320
|
-
"Poking for AWS Glue data quality recommendation run RunId: %s", self.recommendation_run_id
|
321
|
-
)
|
322
|
-
|
323
|
-
response = self.hook.conn.get_data_quality_rule_recommendation_run(RunId=self.recommendation_run_id)
|
324
|
-
|
325
|
-
status = response.get("Status")
|
326
|
-
|
327
|
-
if status in self.SUCCESS_STATES:
|
328
|
-
if self.show_results:
|
329
|
-
self.hook.log_recommendation_results(run_id=self.recommendation_run_id)
|
330
|
-
|
331
|
-
self.log.info(
|
332
|
-
"AWS Glue data quality recommendation run completed RunId: %s Run State: %s",
|
333
|
-
self.recommendation_run_id,
|
334
|
-
response["Status"],
|
335
|
-
)
|
336
|
-
|
337
|
-
return True
|
338
|
-
|
339
|
-
elif status in self.FAILURE_STATES:
|
340
|
-
job_error_message = (
|
341
|
-
f"Error: AWS Glue data quality recommendation run RunId: {self.recommendation_run_id} Run "
|
342
|
-
f"Status: {status}"
|
343
|
-
f": {response.get('ErrorString')}"
|
344
|
-
)
|
345
|
-
self.log.info(job_error_message)
|
346
|
-
# TODO: remove this if block when min_airflow_version is set to higher than 2.7.1
|
347
|
-
if self.soft_fail:
|
348
|
-
raise AirflowSkipException(job_error_message)
|
349
|
-
raise AirflowException(job_error_message)
|
350
|
-
else:
|
351
|
-
return False
|
@@ -78,11 +78,6 @@ class S3KeySensor(BaseSensorOperator):
|
|
78
78
|
CA cert bundle than the one used by botocore.
|
79
79
|
:param deferrable: Run operator in the deferrable mode
|
80
80
|
:param use_regex: whether to use regex to check bucket
|
81
|
-
:param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
|
82
|
-
Acceptable values: Any top level attribute returned by s3.head_object. Specify * to return
|
83
|
-
all available attributes.
|
84
|
-
Default value: "Size".
|
85
|
-
If the requested attribute is not found, the key is still included and the value is None.
|
86
81
|
"""
|
87
82
|
|
88
83
|
template_fields: Sequence[str] = ("bucket_key", "bucket_name")
|
@@ -98,7 +93,6 @@ class S3KeySensor(BaseSensorOperator):
|
|
98
93
|
verify: str | bool | None = None,
|
99
94
|
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
|
100
95
|
use_regex: bool = False,
|
101
|
-
metadata_keys: list[str] | None = None,
|
102
96
|
**kwargs,
|
103
97
|
):
|
104
98
|
super().__init__(**kwargs)
|
@@ -110,14 +104,14 @@ class S3KeySensor(BaseSensorOperator):
|
|
110
104
|
self.verify = verify
|
111
105
|
self.deferrable = deferrable
|
112
106
|
self.use_regex = use_regex
|
113
|
-
self.metadata_keys = metadata_keys if metadata_keys else ["Size"]
|
114
107
|
|
115
108
|
def _check_key(self, key):
|
116
109
|
bucket_name, key = S3Hook.get_s3_bucket_key(self.bucket_name, key, "bucket_name", "bucket_key")
|
117
110
|
self.log.info("Poking for key : s3://%s/%s", bucket_name, key)
|
118
111
|
|
119
112
|
"""
|
120
|
-
Set variable `files` which contains a list of dict which contains
|
113
|
+
Set variable `files` which contains a list of dict which contains only the size
|
114
|
+
If needed we might want to add other attributes later
|
121
115
|
Format: [{
|
122
116
|
'Size': int
|
123
117
|
}]
|
@@ -129,21 +123,8 @@ class S3KeySensor(BaseSensorOperator):
|
|
129
123
|
if not key_matches:
|
130
124
|
return False
|
131
125
|
|
132
|
-
# Reduce the set of metadata to
|
133
|
-
files = []
|
134
|
-
for f in key_matches:
|
135
|
-
metadata = {}
|
136
|
-
if "*" in self.metadata_keys:
|
137
|
-
metadata = self.hook.head_object(f["Key"], bucket_name)
|
138
|
-
else:
|
139
|
-
for key in self.metadata_keys:
|
140
|
-
try:
|
141
|
-
metadata[key] = f[key]
|
142
|
-
except KeyError:
|
143
|
-
# supplied key might be from head_object response
|
144
|
-
self.log.info("Key %s not found in response, performing head_object", key)
|
145
|
-
metadata[key] = self.hook.head_object(f["Key"], bucket_name).get(key, None)
|
146
|
-
files.append(metadata)
|
126
|
+
# Reduce the set of metadata to size only
|
127
|
+
files = [{"Size": f["Size"]} for f in key_matches]
|
147
128
|
elif self.use_regex:
|
148
129
|
keys = self.hook.get_file_metadata("", bucket_name)
|
149
130
|
key_matches = [k for k in keys if re.match(pattern=key, string=k["Key"])]
|
@@ -153,18 +134,7 @@ class S3KeySensor(BaseSensorOperator):
|
|
153
134
|
obj = self.hook.head_object(key, bucket_name)
|
154
135
|
if obj is None:
|
155
136
|
return False
|
156
|
-
|
157
|
-
if "*" in self.metadata_keys:
|
158
|
-
metadata = self.hook.head_object(key, bucket_name)
|
159
|
-
|
160
|
-
else:
|
161
|
-
for key in self.metadata_keys:
|
162
|
-
# backwards compatibility with original implementation
|
163
|
-
if key == "Size":
|
164
|
-
metadata[key] = obj.get("ContentLength")
|
165
|
-
else:
|
166
|
-
metadata[key] = obj.get(key, None)
|
167
|
-
files = [metadata]
|
137
|
+
files = [{"Size": obj["ContentLength"]}]
|
168
138
|
|
169
139
|
if self.check_fn is not None:
|
170
140
|
return self.check_fn(files)
|
@@ -19,14 +19,10 @@ from __future__ import annotations
|
|
19
19
|
|
20
20
|
import asyncio
|
21
21
|
from functools import cached_property
|
22
|
-
from typing import
|
22
|
+
from typing import Any, AsyncIterator
|
23
23
|
|
24
|
-
|
25
|
-
from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
|
26
|
-
|
27
|
-
from airflow.providers.amazon.aws.hooks.glue import GlueDataQualityHook, GlueJobHook
|
24
|
+
from airflow.providers.amazon.aws.hooks.glue import GlueJobHook
|
28
25
|
from airflow.providers.amazon.aws.hooks.glue_catalog import GlueCatalogHook
|
29
|
-
from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
|
30
26
|
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
31
27
|
|
32
28
|
|
@@ -152,73 +148,3 @@ class GlueCatalogPartitionTrigger(BaseTrigger):
|
|
152
148
|
break
|
153
149
|
else:
|
154
150
|
await asyncio.sleep(self.waiter_delay)
|
155
|
-
|
156
|
-
|
157
|
-
class GlueDataQualityRuleSetEvaluationRunCompleteTrigger(AwsBaseWaiterTrigger):
|
158
|
-
"""
|
159
|
-
Trigger when a AWS Glue data quality evaluation run complete.
|
160
|
-
|
161
|
-
:param evaluation_run_id: The AWS Glue data quality ruleset evaluation run identifier.
|
162
|
-
:param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
|
163
|
-
:param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
|
164
|
-
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
165
|
-
"""
|
166
|
-
|
167
|
-
def __init__(
|
168
|
-
self,
|
169
|
-
evaluation_run_id: str,
|
170
|
-
waiter_delay: int = 60,
|
171
|
-
waiter_max_attempts: int = 75,
|
172
|
-
aws_conn_id: str | None = "aws_default",
|
173
|
-
):
|
174
|
-
super().__init__(
|
175
|
-
serialized_fields={"evaluation_run_id": evaluation_run_id},
|
176
|
-
waiter_name="data_quality_ruleset_evaluation_run_complete",
|
177
|
-
waiter_args={"RunId": evaluation_run_id},
|
178
|
-
failure_message="AWS Glue data quality ruleset evaluation run failed.",
|
179
|
-
status_message="Status of AWS Glue data quality ruleset evaluation run is",
|
180
|
-
status_queries=["Status"],
|
181
|
-
return_key="evaluation_run_id",
|
182
|
-
return_value=evaluation_run_id,
|
183
|
-
waiter_delay=waiter_delay,
|
184
|
-
waiter_max_attempts=waiter_max_attempts,
|
185
|
-
aws_conn_id=aws_conn_id,
|
186
|
-
)
|
187
|
-
|
188
|
-
def hook(self) -> AwsGenericHook:
|
189
|
-
return GlueDataQualityHook(aws_conn_id=self.aws_conn_id)
|
190
|
-
|
191
|
-
|
192
|
-
class GlueDataQualityRuleRecommendationRunCompleteTrigger(AwsBaseWaiterTrigger):
|
193
|
-
"""
|
194
|
-
Trigger when a AWS Glue data quality recommendation run complete.
|
195
|
-
|
196
|
-
:param recommendation_run_id: The AWS Glue data quality rule recommendation run identifier.
|
197
|
-
:param waiter_delay: The amount of time in seconds to wait between attempts. (default: 60)
|
198
|
-
:param waiter_max_attempts: The maximum number of attempts to be made. (default: 75)
|
199
|
-
:param aws_conn_id: The Airflow connection used for AWS credentials.
|
200
|
-
"""
|
201
|
-
|
202
|
-
def __init__(
|
203
|
-
self,
|
204
|
-
recommendation_run_id: str,
|
205
|
-
waiter_delay: int = 60,
|
206
|
-
waiter_max_attempts: int = 75,
|
207
|
-
aws_conn_id: str | None = "aws_default",
|
208
|
-
):
|
209
|
-
super().__init__(
|
210
|
-
serialized_fields={"recommendation_run_id": recommendation_run_id},
|
211
|
-
waiter_name="data_quality_rule_recommendation_run_complete",
|
212
|
-
waiter_args={"RunId": recommendation_run_id},
|
213
|
-
failure_message="AWS Glue data quality recommendation run failed.",
|
214
|
-
status_message="Status of AWS Glue data quality recommendation run is",
|
215
|
-
status_queries=["Status"],
|
216
|
-
return_key="recommendation_run_id",
|
217
|
-
return_value=recommendation_run_id,
|
218
|
-
waiter_delay=waiter_delay,
|
219
|
-
waiter_max_attempts=waiter_max_attempts,
|
220
|
-
aws_conn_id=aws_conn_id,
|
221
|
-
)
|
222
|
-
|
223
|
-
def hook(self) -> AwsGenericHook:
|
224
|
-
return GlueDataQualityHook(aws_conn_id=self.aws_conn_id)
|
@@ -25,104 +25,6 @@
|
|
25
25
|
"state": "success"
|
26
26
|
}
|
27
27
|
]
|
28
|
-
},
|
29
|
-
"data_quality_ruleset_evaluation_run_complete": {
|
30
|
-
"operation": "GetDataQualityRulesetEvaluationRun",
|
31
|
-
"delay": 60,
|
32
|
-
"maxAttempts": 75,
|
33
|
-
"acceptors": [
|
34
|
-
{
|
35
|
-
"matcher": "path",
|
36
|
-
"argument": "Status",
|
37
|
-
"expected": "STARTING",
|
38
|
-
"state": "retry"
|
39
|
-
},
|
40
|
-
{
|
41
|
-
"matcher": "path",
|
42
|
-
"argument": "Status",
|
43
|
-
"expected": "RUNNING",
|
44
|
-
"state": "retry"
|
45
|
-
},
|
46
|
-
{
|
47
|
-
"matcher": "path",
|
48
|
-
"argument": "Status",
|
49
|
-
"expected": "STOPPING",
|
50
|
-
"state": "failure"
|
51
|
-
},
|
52
|
-
{
|
53
|
-
"matcher": "path",
|
54
|
-
"argument": "Status",
|
55
|
-
"expected": "STOPPED",
|
56
|
-
"state": "failure"
|
57
|
-
},
|
58
|
-
{
|
59
|
-
"matcher": "path",
|
60
|
-
"argument": "Status",
|
61
|
-
"expected": "FAILED",
|
62
|
-
"state": "failure"
|
63
|
-
},
|
64
|
-
{
|
65
|
-
"matcher": "path",
|
66
|
-
"argument": "Status",
|
67
|
-
"expected": "TIMEOUT",
|
68
|
-
"state": "failure"
|
69
|
-
},
|
70
|
-
{
|
71
|
-
"matcher": "path",
|
72
|
-
"argument": "Status",
|
73
|
-
"expected": "SUCCEEDED",
|
74
|
-
"state": "success"
|
75
|
-
}
|
76
|
-
]
|
77
|
-
},
|
78
|
-
"data_quality_rule_recommendation_run_complete": {
|
79
|
-
"operation": "GetDataQualityRuleRecommendationRun",
|
80
|
-
"delay": 60,
|
81
|
-
"maxAttempts": 75,
|
82
|
-
"acceptors": [
|
83
|
-
{
|
84
|
-
"matcher": "path",
|
85
|
-
"argument": "Status",
|
86
|
-
"expected": "STARTING",
|
87
|
-
"state": "retry"
|
88
|
-
},
|
89
|
-
{
|
90
|
-
"matcher": "path",
|
91
|
-
"argument": "Status",
|
92
|
-
"expected": "RUNNING",
|
93
|
-
"state": "retry"
|
94
|
-
},
|
95
|
-
{
|
96
|
-
"matcher": "path",
|
97
|
-
"argument": "Status",
|
98
|
-
"expected": "STOPPING",
|
99
|
-
"state": "failure"
|
100
|
-
},
|
101
|
-
{
|
102
|
-
"matcher": "path",
|
103
|
-
"argument": "Status",
|
104
|
-
"expected": "STOPPED",
|
105
|
-
"state": "failure"
|
106
|
-
},
|
107
|
-
{
|
108
|
-
"matcher": "path",
|
109
|
-
"argument": "Status",
|
110
|
-
"expected": "FAILED",
|
111
|
-
"state": "failure"
|
112
|
-
},
|
113
|
-
{
|
114
|
-
"matcher": "path",
|
115
|
-
"argument": "Status",
|
116
|
-
"expected": "TIMEOUT",
|
117
|
-
"state": "failure"
|
118
|
-
},
|
119
|
-
{
|
120
|
-
"matcher": "path",
|
121
|
-
"argument": "Status",
|
122
|
-
"expected": "SUCCEEDED",
|
123
|
-
"state": "success"
|
124
|
-
}
|
125
|
-
]
|
126
28
|
}
|
127
29
|
}
|
128
30
|
}
|
@@ -94,38 +94,32 @@ def get_provider_info():
|
|
94
94
|
"apache-airflow>=2.7.0",
|
95
95
|
"apache-airflow-providers-common-sql>=1.3.1",
|
96
96
|
"apache-airflow-providers-http",
|
97
|
-
"boto3>=1.
|
98
|
-
"botocore>=1.
|
97
|
+
"boto3>=1.33.0",
|
98
|
+
"botocore>=1.33.0",
|
99
99
|
"inflection>=0.5.1",
|
100
|
-
"watchtower>=
|
100
|
+
"watchtower>=2.0.1,<4",
|
101
101
|
"jsonpath_ng>=1.5.3",
|
102
102
|
"redshift_connector>=2.0.918",
|
103
103
|
"sqlalchemy_redshift>=0.8.6",
|
104
|
-
"asgiref
|
104
|
+
"asgiref",
|
105
105
|
"PyAthena>=3.0.10",
|
106
|
-
"jmespath
|
106
|
+
"jmespath",
|
107
107
|
],
|
108
108
|
"additional-extras": [
|
109
|
-
{
|
110
|
-
|
111
|
-
"dependencies": [
|
112
|
-
'pandas>=1.5.3,<2.2;python_version<"3.12"',
|
113
|
-
'pandas>=2.1.1,<2.2;python_version>="3.12"',
|
114
|
-
],
|
115
|
-
},
|
116
|
-
{"name": "aiobotocore", "dependencies": ["aiobotocore[boto3]>=2.13.0"]},
|
109
|
+
{"name": "pandas", "dependencies": ["pandas>=1.2.5,<2.2"]},
|
110
|
+
{"name": "aiobotocore", "dependencies": ["aiobotocore[boto3]>=2.5.3"]},
|
117
111
|
{"name": "cncf.kubernetes", "dependencies": ["apache-airflow-providers-cncf-kubernetes>=7.2.0"]},
|
118
112
|
{"name": "s3fs", "dependencies": ["s3fs>=2023.10.0"]},
|
119
113
|
{"name": "python3-saml", "dependencies": ["python3-saml>=1.16.0"]},
|
120
114
|
],
|
121
115
|
"devel-dependencies": [
|
122
|
-
"aiobotocore>=2.
|
116
|
+
"aiobotocore>=2.7.0",
|
123
117
|
"aws_xray_sdk>=2.12.0",
|
124
118
|
"moto[cloudformation,glue]>=5.0.0",
|
125
|
-
"mypy-boto3-appflow>=1.
|
126
|
-
"mypy-boto3-rds>=1.
|
127
|
-
"mypy-boto3-redshift-data>=1.
|
128
|
-
"mypy-boto3-s3>=1.
|
119
|
+
"mypy-boto3-appflow>=1.33.0",
|
120
|
+
"mypy-boto3-rds>=1.33.0",
|
121
|
+
"mypy-boto3-redshift-data>=1.33.0",
|
122
|
+
"mypy-boto3-s3>=1.33.0",
|
129
123
|
"s3fs>=2023.10.0",
|
130
124
|
"openapi-schema-validator>=0.6.2",
|
131
125
|
"openapi-spec-validator>=0.7.1",
|