apache-airflow-providers-standard 1.0.0.dev0__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-standard might be problematic. Click here for more details.
- airflow/providers/standard/LICENSE +0 -52
- airflow/providers/standard/__init__.py +23 -1
- airflow/providers/standard/decorators/__init__.py +16 -0
- airflow/providers/standard/decorators/bash.py +121 -0
- airflow/providers/standard/decorators/branch_external_python.py +63 -0
- airflow/providers/standard/decorators/branch_python.py +62 -0
- airflow/providers/standard/decorators/branch_virtualenv.py +62 -0
- airflow/providers/standard/decorators/external_python.py +70 -0
- airflow/providers/standard/decorators/python.py +86 -0
- airflow/providers/standard/decorators/python_virtualenv.py +67 -0
- airflow/providers/standard/decorators/sensor.py +83 -0
- airflow/providers/standard/decorators/short_circuit.py +65 -0
- airflow/providers/standard/get_provider_info.py +80 -7
- airflow/providers/standard/hooks/__init__.py +16 -0
- airflow/providers/standard/hooks/filesystem.py +89 -0
- airflow/providers/standard/hooks/package_index.py +95 -0
- airflow/providers/standard/hooks/subprocess.py +119 -0
- airflow/providers/standard/operators/bash.py +273 -0
- airflow/providers/standard/operators/branch.py +105 -0
- airflow/providers/standard/operators/datetime.py +15 -5
- airflow/providers/standard/operators/empty.py +39 -0
- airflow/providers/standard/operators/latest_only.py +115 -0
- airflow/providers/standard/operators/python.py +1143 -0
- airflow/providers/standard/operators/smooth.py +38 -0
- airflow/providers/standard/operators/trigger_dagrun.py +370 -0
- airflow/providers/standard/operators/weekday.py +19 -9
- airflow/providers/standard/sensors/bash.py +118 -0
- airflow/providers/standard/sensors/date_time.py +32 -8
- airflow/providers/standard/sensors/external_task.py +593 -0
- airflow/providers/standard/sensors/filesystem.py +158 -0
- airflow/providers/standard/sensors/python.py +84 -0
- airflow/providers/standard/sensors/time.py +28 -5
- airflow/providers/standard/sensors/time_delta.py +68 -15
- airflow/providers/standard/sensors/weekday.py +25 -7
- airflow/providers/standard/triggers/__init__.py +16 -0
- airflow/providers/standard/triggers/external_task.py +288 -0
- airflow/providers/standard/triggers/file.py +131 -0
- airflow/providers/standard/triggers/temporal.py +113 -0
- airflow/providers/standard/utils/__init__.py +16 -0
- airflow/providers/standard/utils/python_virtualenv.py +209 -0
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +82 -0
- airflow/providers/standard/utils/sensor_helper.py +137 -0
- airflow/providers/standard/utils/skipmixin.py +192 -0
- airflow/providers/standard/utils/weekday.py +77 -0
- airflow/providers/standard/version_compat.py +36 -0
- {apache_airflow_providers_standard-1.0.0.dev0.dist-info → apache_airflow_providers_standard-1.0.0rc1.dist-info}/METADATA +12 -31
- apache_airflow_providers_standard-1.0.0rc1.dist-info/RECORD +51 -0
- {apache_airflow_providers_standard-1.0.0.dev0.dist-info → apache_airflow_providers_standard-1.0.0rc1.dist-info}/WHEEL +1 -1
- apache_airflow_providers_standard-1.0.0.dev0.dist-info/RECORD +0 -15
- {apache_airflow_providers_standard-1.0.0.dev0.dist-info → apache_airflow_providers_standard-1.0.0rc1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import typing
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from asgiref.sync import sync_to_async
|
|
24
|
+
from sqlalchemy import func
|
|
25
|
+
|
|
26
|
+
from airflow.models import DagRun
|
|
27
|
+
from airflow.providers.standard.utils.sensor_helper import _get_count
|
|
28
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
29
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
30
|
+
|
|
31
|
+
if typing.TYPE_CHECKING:
|
|
32
|
+
from datetime import datetime
|
|
33
|
+
|
|
34
|
+
from sqlalchemy.orm import Session
|
|
35
|
+
|
|
36
|
+
from airflow.utils.state import DagRunState
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class WorkflowTrigger(BaseTrigger):
|
|
40
|
+
"""
|
|
41
|
+
A trigger to monitor tasks, task group and dag execution in Apache Airflow.
|
|
42
|
+
|
|
43
|
+
:param external_dag_id: The ID of the external dag.
|
|
44
|
+
:param run_ids: A list of run ids for the external dag.
|
|
45
|
+
:param external_task_ids: A collection of external task IDs to wait for.
|
|
46
|
+
:param external_task_group_id: The ID of the external task group to wait for.
|
|
47
|
+
:param failed_states: States considered as failed for external tasks.
|
|
48
|
+
:param skipped_states: States considered as skipped for external tasks.
|
|
49
|
+
:param allowed_states: States considered as successful for external tasks.
|
|
50
|
+
:param poke_interval: The interval (in seconds) for poking the external tasks.
|
|
51
|
+
:param soft_fail: If True, the trigger will not fail the entire dag on external task failure.
|
|
52
|
+
:param logical_dates: A list of logical dates for the external dag.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
external_dag_id: str,
|
|
58
|
+
run_ids: list[str] | None = None,
|
|
59
|
+
execution_dates: list[datetime] | None = None,
|
|
60
|
+
logical_dates: list[datetime] | None = None,
|
|
61
|
+
external_task_ids: typing.Collection[str] | None = None,
|
|
62
|
+
external_task_group_id: str | None = None,
|
|
63
|
+
failed_states: typing.Iterable[str] | None = None,
|
|
64
|
+
skipped_states: typing.Iterable[str] | None = None,
|
|
65
|
+
allowed_states: typing.Iterable[str] | None = None,
|
|
66
|
+
poke_interval: float = 2.0,
|
|
67
|
+
soft_fail: bool = False,
|
|
68
|
+
**kwargs,
|
|
69
|
+
):
|
|
70
|
+
self.external_dag_id = external_dag_id
|
|
71
|
+
self.external_task_ids = external_task_ids
|
|
72
|
+
self.external_task_group_id = external_task_group_id
|
|
73
|
+
self.failed_states = failed_states
|
|
74
|
+
self.skipped_states = skipped_states
|
|
75
|
+
self.allowed_states = allowed_states
|
|
76
|
+
self.run_ids = run_ids
|
|
77
|
+
self.poke_interval = poke_interval
|
|
78
|
+
self.soft_fail = soft_fail
|
|
79
|
+
self.execution_dates = execution_dates
|
|
80
|
+
self.logical_dates = logical_dates
|
|
81
|
+
super().__init__(**kwargs)
|
|
82
|
+
|
|
83
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
84
|
+
"""Serialize the trigger param and module path."""
|
|
85
|
+
data: dict[str, typing.Any] = {
|
|
86
|
+
"external_dag_id": self.external_dag_id,
|
|
87
|
+
"external_task_ids": self.external_task_ids,
|
|
88
|
+
"external_task_group_id": self.external_task_group_id,
|
|
89
|
+
"failed_states": self.failed_states,
|
|
90
|
+
"skipped_states": self.skipped_states,
|
|
91
|
+
"allowed_states": self.allowed_states,
|
|
92
|
+
"poke_interval": self.poke_interval,
|
|
93
|
+
"soft_fail": self.soft_fail,
|
|
94
|
+
}
|
|
95
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
96
|
+
data["run_ids"] = self.run_ids
|
|
97
|
+
data["logical_dates"] = self.logical_dates
|
|
98
|
+
else:
|
|
99
|
+
data["execution_dates"] = self.execution_dates
|
|
100
|
+
|
|
101
|
+
return "airflow.providers.standard.triggers.external_task.WorkflowTrigger", data
|
|
102
|
+
|
|
103
|
+
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
104
|
+
"""Check periodically tasks, task group or dag status."""
|
|
105
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
106
|
+
get_count_func = self._get_count_af_3
|
|
107
|
+
run_id_or_dates = (self.run_ids or self.logical_dates) or []
|
|
108
|
+
else:
|
|
109
|
+
get_count_func = self._get_count
|
|
110
|
+
run_id_or_dates = self.execution_dates or []
|
|
111
|
+
|
|
112
|
+
while True:
|
|
113
|
+
if self.failed_states:
|
|
114
|
+
failed_count = await get_count_func(self.failed_states)
|
|
115
|
+
if failed_count > 0:
|
|
116
|
+
yield TriggerEvent({"status": "failed"})
|
|
117
|
+
return
|
|
118
|
+
else:
|
|
119
|
+
yield TriggerEvent({"status": "success"})
|
|
120
|
+
return
|
|
121
|
+
if self.skipped_states:
|
|
122
|
+
skipped_count = await get_count_func(self.skipped_states)
|
|
123
|
+
if skipped_count > 0:
|
|
124
|
+
yield TriggerEvent({"status": "skipped"})
|
|
125
|
+
return
|
|
126
|
+
allowed_count = await get_count_func(self.allowed_states)
|
|
127
|
+
|
|
128
|
+
if allowed_count == len(run_id_or_dates): # type: ignore[arg-type]
|
|
129
|
+
yield TriggerEvent({"status": "success"})
|
|
130
|
+
return
|
|
131
|
+
self.log.info("Sleeping for %s seconds", self.poke_interval)
|
|
132
|
+
await asyncio.sleep(self.poke_interval)
|
|
133
|
+
|
|
134
|
+
async def _get_count_af_3(self, states):
|
|
135
|
+
from airflow.providers.standard.utils.sensor_helper import _get_count_by_matched_states
|
|
136
|
+
from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
|
|
137
|
+
|
|
138
|
+
params = {
|
|
139
|
+
"dag_id": self.external_dag_id,
|
|
140
|
+
"logical_dates": self.logical_dates,
|
|
141
|
+
"run_ids": self.run_ids,
|
|
142
|
+
}
|
|
143
|
+
if self.external_task_ids:
|
|
144
|
+
count = await sync_to_async(RuntimeTaskInstance.get_ti_count)(
|
|
145
|
+
task_ids=self.external_task_ids, # type: ignore[arg-type]
|
|
146
|
+
states=states,
|
|
147
|
+
**params,
|
|
148
|
+
)
|
|
149
|
+
elif self.external_task_group_id:
|
|
150
|
+
run_id_task_state_map = await sync_to_async(RuntimeTaskInstance.get_task_states)(
|
|
151
|
+
task_group_id=self.external_task_group_id,
|
|
152
|
+
**params,
|
|
153
|
+
)
|
|
154
|
+
count = await sync_to_async(_get_count_by_matched_states)(
|
|
155
|
+
run_id_task_state_map=run_id_task_state_map,
|
|
156
|
+
states=states,
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
count = await sync_to_async(RuntimeTaskInstance.get_dr_count)(
|
|
160
|
+
dag_id=self.external_dag_id,
|
|
161
|
+
logical_dates=self.logical_dates,
|
|
162
|
+
run_ids=self.run_ids,
|
|
163
|
+
states=states,
|
|
164
|
+
)
|
|
165
|
+
if self.external_task_ids:
|
|
166
|
+
return count / len(self.external_task_ids)
|
|
167
|
+
return count
|
|
168
|
+
|
|
169
|
+
@sync_to_async
|
|
170
|
+
def _get_count(self, states: typing.Iterable[str] | None) -> int:
|
|
171
|
+
"""
|
|
172
|
+
Get the count of records against dttm filter and states. Async wrapper for _get_count.
|
|
173
|
+
|
|
174
|
+
:param states: task or dag states
|
|
175
|
+
:return The count of records.
|
|
176
|
+
"""
|
|
177
|
+
return _get_count(
|
|
178
|
+
dttm_filter=self.run_ids if AIRFLOW_V_3_0_PLUS else self.execution_dates,
|
|
179
|
+
external_task_ids=self.external_task_ids,
|
|
180
|
+
external_task_group_id=self.external_task_group_id,
|
|
181
|
+
external_dag_id=self.external_dag_id,
|
|
182
|
+
states=states,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class DagStateTrigger(BaseTrigger):
|
|
187
|
+
"""
|
|
188
|
+
Waits asynchronously for a dag to complete for a specific run_id.
|
|
189
|
+
|
|
190
|
+
:param dag_id: The dag_id that contains the task you want to wait for
|
|
191
|
+
:param states: allowed states, default is ``['success']``
|
|
192
|
+
:param run_ids: The run_id of dag run.
|
|
193
|
+
:param poll_interval: The time interval in seconds to check the state.
|
|
194
|
+
The default value is 5.0 sec.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
def __init__(
|
|
198
|
+
self,
|
|
199
|
+
dag_id: str,
|
|
200
|
+
states: list[DagRunState],
|
|
201
|
+
run_ids: list[str] | None = None,
|
|
202
|
+
execution_dates: list[datetime] | None = None,
|
|
203
|
+
poll_interval: float = 5.0,
|
|
204
|
+
):
|
|
205
|
+
super().__init__()
|
|
206
|
+
self.dag_id = dag_id
|
|
207
|
+
self.states = states
|
|
208
|
+
self.run_ids = run_ids
|
|
209
|
+
self.execution_dates = execution_dates
|
|
210
|
+
self.poll_interval = poll_interval
|
|
211
|
+
|
|
212
|
+
def serialize(self) -> tuple[str, dict[str, typing.Any]]:
|
|
213
|
+
"""Serialize DagStateTrigger arguments and classpath."""
|
|
214
|
+
data = {
|
|
215
|
+
"dag_id": self.dag_id,
|
|
216
|
+
"states": self.states,
|
|
217
|
+
"poll_interval": self.poll_interval,
|
|
218
|
+
"run_ids": self.run_ids,
|
|
219
|
+
"execution_dates": self.execution_dates,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return "airflow.providers.standard.triggers.external_task.DagStateTrigger", data
|
|
223
|
+
|
|
224
|
+
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
225
|
+
"""Check periodically if the dag run exists, and has hit one of the states yet, or not."""
|
|
226
|
+
runs_ids_or_dates = 0
|
|
227
|
+
if self.run_ids:
|
|
228
|
+
runs_ids_or_dates = len(self.run_ids)
|
|
229
|
+
elif self.execution_dates:
|
|
230
|
+
runs_ids_or_dates = len(self.execution_dates)
|
|
231
|
+
|
|
232
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
233
|
+
event = await self.validate_count_dags_af_3(runs_ids_or_dates_len=runs_ids_or_dates)
|
|
234
|
+
yield TriggerEvent(event)
|
|
235
|
+
return
|
|
236
|
+
else:
|
|
237
|
+
while True:
|
|
238
|
+
num_dags = await self.count_dags() # type: ignore[call-arg]
|
|
239
|
+
if num_dags == runs_ids_or_dates:
|
|
240
|
+
yield TriggerEvent(self.serialize())
|
|
241
|
+
return
|
|
242
|
+
await asyncio.sleep(self.poll_interval)
|
|
243
|
+
|
|
244
|
+
async def validate_count_dags_af_3(self, runs_ids_or_dates_len: int = 0) -> tuple[str, dict[str, Any]]:
|
|
245
|
+
from airflow.sdk.execution_time.task_runner import RuntimeTaskInstance
|
|
246
|
+
|
|
247
|
+
cls_path, data = self.serialize()
|
|
248
|
+
|
|
249
|
+
while True:
|
|
250
|
+
num_dags = await sync_to_async(RuntimeTaskInstance.get_dr_count)(
|
|
251
|
+
dag_id=self.dag_id,
|
|
252
|
+
run_ids=self.run_ids,
|
|
253
|
+
states=self.states, # type: ignore[arg-type]
|
|
254
|
+
logical_dates=self.execution_dates,
|
|
255
|
+
)
|
|
256
|
+
if num_dags == runs_ids_or_dates_len:
|
|
257
|
+
if isinstance(self.run_ids, list):
|
|
258
|
+
for run_id in self.run_ids:
|
|
259
|
+
state = await sync_to_async(RuntimeTaskInstance.get_dagrun_state)(
|
|
260
|
+
dag_id=self.dag_id,
|
|
261
|
+
run_id=run_id,
|
|
262
|
+
)
|
|
263
|
+
data[run_id] = state
|
|
264
|
+
return cls_path, data
|
|
265
|
+
await asyncio.sleep(self.poll_interval)
|
|
266
|
+
|
|
267
|
+
if not AIRFLOW_V_3_0_PLUS:
|
|
268
|
+
from airflow.utils.session import NEW_SESSION, provide_session # type: ignore[misc]
|
|
269
|
+
|
|
270
|
+
@sync_to_async
|
|
271
|
+
@provide_session
|
|
272
|
+
def count_dags(self, *, session: Session = NEW_SESSION) -> int:
|
|
273
|
+
"""Count how many dag runs in the database match our criteria."""
|
|
274
|
+
_dag_run_date_condition = (
|
|
275
|
+
DagRun.run_id.in_(self.run_ids)
|
|
276
|
+
if AIRFLOW_V_3_0_PLUS
|
|
277
|
+
else DagRun.execution_date.in_(self.execution_dates)
|
|
278
|
+
)
|
|
279
|
+
count = (
|
|
280
|
+
session.query(func.count("*")) # .count() is inefficient
|
|
281
|
+
.filter(
|
|
282
|
+
DagRun.dag_id == self.dag_id,
|
|
283
|
+
DagRun.state.in_(self.states),
|
|
284
|
+
_dag_run_date_condition,
|
|
285
|
+
)
|
|
286
|
+
.scalar()
|
|
287
|
+
)
|
|
288
|
+
return typing.cast("int", count)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import datetime
|
|
21
|
+
import os
|
|
22
|
+
from collections.abc import AsyncIterator
|
|
23
|
+
from glob import glob
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
27
|
+
|
|
28
|
+
if AIRFLOW_V_3_0_PLUS:
|
|
29
|
+
from airflow.triggers.base import BaseEventTrigger, BaseTrigger, TriggerEvent
|
|
30
|
+
else:
|
|
31
|
+
from airflow.triggers.base import ( # type: ignore
|
|
32
|
+
BaseTrigger,
|
|
33
|
+
BaseTrigger as BaseEventTrigger,
|
|
34
|
+
TriggerEvent,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class FileTrigger(BaseTrigger):
|
|
39
|
+
"""
|
|
40
|
+
A trigger that fires exactly once after it finds the requested file or folder.
|
|
41
|
+
|
|
42
|
+
:param filepath: File or folder name (relative to the base path set within the connection), can
|
|
43
|
+
be a glob.
|
|
44
|
+
:param recursive: when set to ``True``, enables recursive directory matching behavior of
|
|
45
|
+
``**`` in glob filepath parameter. Defaults to ``False``.
|
|
46
|
+
:param poke_interval: Time that the job should wait in between each try
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
filepath: str,
|
|
52
|
+
recursive: bool = False,
|
|
53
|
+
poke_interval: float = 5.0,
|
|
54
|
+
**kwargs,
|
|
55
|
+
):
|
|
56
|
+
super().__init__()
|
|
57
|
+
self.filepath = filepath
|
|
58
|
+
self.recursive = recursive
|
|
59
|
+
self.poke_interval = poke_interval
|
|
60
|
+
|
|
61
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
62
|
+
"""Serialize FileTrigger arguments and classpath."""
|
|
63
|
+
return (
|
|
64
|
+
"airflow.providers.standard.triggers.file.FileTrigger",
|
|
65
|
+
{
|
|
66
|
+
"filepath": self.filepath,
|
|
67
|
+
"recursive": self.recursive,
|
|
68
|
+
"poke_interval": self.poke_interval,
|
|
69
|
+
},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
73
|
+
"""Loop until the relevant files are found."""
|
|
74
|
+
while True:
|
|
75
|
+
for path in glob(self.filepath, recursive=self.recursive):
|
|
76
|
+
if os.path.isfile(path):
|
|
77
|
+
mod_time_f = os.path.getmtime(path)
|
|
78
|
+
mod_time = datetime.datetime.fromtimestamp(mod_time_f).strftime("%Y%m%d%H%M%S")
|
|
79
|
+
self.log.info("Found File %s last modified: %s", path, mod_time)
|
|
80
|
+
yield TriggerEvent(True)
|
|
81
|
+
return
|
|
82
|
+
for _, _, files in os.walk(self.filepath):
|
|
83
|
+
if files:
|
|
84
|
+
yield TriggerEvent(True)
|
|
85
|
+
return
|
|
86
|
+
await asyncio.sleep(self.poke_interval)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class FileDeleteTrigger(BaseEventTrigger):
|
|
90
|
+
"""
|
|
91
|
+
A trigger that fires exactly once after it finds the requested file and then delete the file.
|
|
92
|
+
|
|
93
|
+
The difference between ``FileTrigger`` and ``FileDeleteTrigger`` is ``FileDeleteTrigger`` can only find a
|
|
94
|
+
specific file.
|
|
95
|
+
|
|
96
|
+
:param filepath: File (relative to the base path set within the connection).
|
|
97
|
+
:param poke_interval: Time that the job should wait in between each try
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
filepath: str,
|
|
103
|
+
poke_interval: float = 5.0,
|
|
104
|
+
**kwargs,
|
|
105
|
+
):
|
|
106
|
+
super().__init__()
|
|
107
|
+
self.filepath = filepath
|
|
108
|
+
self.poke_interval = poke_interval
|
|
109
|
+
|
|
110
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
111
|
+
"""Serialize FileDeleteTrigger arguments and classpath."""
|
|
112
|
+
return (
|
|
113
|
+
"airflow.providers.standard.triggers.file.FileDeleteTrigger",
|
|
114
|
+
{
|
|
115
|
+
"filepath": self.filepath,
|
|
116
|
+
"poke_interval": self.poke_interval,
|
|
117
|
+
},
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
121
|
+
"""Loop until the relevant file is found."""
|
|
122
|
+
while True:
|
|
123
|
+
if os.path.isfile(self.filepath):
|
|
124
|
+
mod_time_f = os.path.getmtime(self.filepath)
|
|
125
|
+
mod_time = datetime.datetime.fromtimestamp(mod_time_f).strftime("%Y%m%d%H%M%S")
|
|
126
|
+
self.log.info("Found file %s last modified: %s", self.filepath, mod_time)
|
|
127
|
+
os.remove(self.filepath)
|
|
128
|
+
self.log.info("File %s has been deleted", self.filepath)
|
|
129
|
+
yield TriggerEvent(True)
|
|
130
|
+
return
|
|
131
|
+
await asyncio.sleep(self.poke_interval)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import datetime
|
|
21
|
+
from collections.abc import AsyncIterator
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
import pendulum
|
|
25
|
+
|
|
26
|
+
from airflow.exceptions import AirflowException
|
|
27
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_2_10_PLUS
|
|
28
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
29
|
+
from airflow.utils import timezone
|
|
30
|
+
|
|
31
|
+
if AIRFLOW_V_2_10_PLUS:
|
|
32
|
+
from airflow.triggers.base import TaskSuccessEvent
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DateTimeTrigger(BaseTrigger):
|
|
36
|
+
"""
|
|
37
|
+
Trigger based on a datetime.
|
|
38
|
+
|
|
39
|
+
A trigger that fires exactly once, at the given datetime, give or take
|
|
40
|
+
a few seconds.
|
|
41
|
+
|
|
42
|
+
The provided datetime MUST be in UTC.
|
|
43
|
+
|
|
44
|
+
:param moment: when to yield event
|
|
45
|
+
:param end_from_trigger: whether the trigger should mark the task successful after time condition
|
|
46
|
+
reached or resume the task after time condition reached.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, moment: datetime.datetime, *, end_from_trigger: bool = False) -> None:
|
|
50
|
+
super().__init__()
|
|
51
|
+
if not isinstance(moment, datetime.datetime):
|
|
52
|
+
raise TypeError(f"Expected datetime.datetime type for moment. Got {type(moment)}")
|
|
53
|
+
# Make sure it's in UTC
|
|
54
|
+
if moment.tzinfo is None:
|
|
55
|
+
raise ValueError("You cannot pass naive datetimes")
|
|
56
|
+
self.moment: pendulum.DateTime = timezone.convert_to_utc(moment)
|
|
57
|
+
if not AIRFLOW_V_2_10_PLUS and end_from_trigger:
|
|
58
|
+
raise AirflowException("end_from_trigger is only supported in Airflow 2.10 and later. ")
|
|
59
|
+
|
|
60
|
+
self.end_from_trigger = end_from_trigger
|
|
61
|
+
|
|
62
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
63
|
+
return (
|
|
64
|
+
"airflow.providers.standard.triggers.temporal.DateTimeTrigger",
|
|
65
|
+
{"moment": self.moment, "end_from_trigger": self.end_from_trigger},
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
69
|
+
"""
|
|
70
|
+
Loop until the relevant time is met.
|
|
71
|
+
|
|
72
|
+
We do have a two-phase delay to save some cycles, but sleeping is so
|
|
73
|
+
cheap anyway that it's pretty loose. We also don't just sleep for
|
|
74
|
+
"the number of seconds until the time" in case the system clock changes
|
|
75
|
+
unexpectedly, or handles a DST change poorly.
|
|
76
|
+
"""
|
|
77
|
+
# Sleep in successively smaller increments starting from 1 hour down to 10 seconds at a time
|
|
78
|
+
self.log.info("trigger starting")
|
|
79
|
+
for step in 3600, 60, 10:
|
|
80
|
+
seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
|
|
81
|
+
while seconds_remaining > 2 * step:
|
|
82
|
+
self.log.info("%d seconds remaining; sleeping %s seconds", seconds_remaining, step)
|
|
83
|
+
await asyncio.sleep(step)
|
|
84
|
+
seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
|
|
85
|
+
# Sleep a second at a time otherwise
|
|
86
|
+
while self.moment > pendulum.instance(timezone.utcnow()):
|
|
87
|
+
self.log.info("sleeping 1 second...")
|
|
88
|
+
await asyncio.sleep(1)
|
|
89
|
+
if self.end_from_trigger:
|
|
90
|
+
self.log.info("Sensor time condition reached; marking task successful and exiting")
|
|
91
|
+
yield TaskSuccessEvent()
|
|
92
|
+
else:
|
|
93
|
+
self.log.info("yielding event with payload %r", self.moment)
|
|
94
|
+
yield TriggerEvent(self.moment)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class TimeDeltaTrigger(DateTimeTrigger):
|
|
98
|
+
"""
|
|
99
|
+
Create DateTimeTriggers based on delays.
|
|
100
|
+
|
|
101
|
+
Subclass to create DateTimeTriggers based on time delays rather
|
|
102
|
+
than exact moments.
|
|
103
|
+
|
|
104
|
+
While this is its own distinct class here, it will serialise to a
|
|
105
|
+
DateTimeTrigger class, since they're operationally the same.
|
|
106
|
+
|
|
107
|
+
:param delta: how long to wait
|
|
108
|
+
:param end_from_trigger: whether the trigger should mark the task successful after time condition
|
|
109
|
+
reached or resume the task after time condition reached.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(self, delta: datetime.timedelta, *, end_from_trigger: bool = False) -> None:
|
|
113
|
+
super().__init__(moment=timezone.utcnow() + delta, end_from_trigger=end_from_trigger)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|