apache-airflow-providers-standard 0.1.0rc1__py3-none-any.whl → 1.0.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-standard might be problematic. Click here for more details.
- airflow/providers/standard/LICENSE +52 -0
- airflow/providers/standard/__init__.py +1 -23
- airflow/providers/standard/get_provider_info.py +7 -52
- airflow/providers/standard/operators/bash.py +28 -82
- airflow/providers/standard/operators/datetime.py +3 -8
- airflow/providers/standard/operators/weekday.py +4 -11
- airflow/providers/standard/sensors/bash.py +5 -11
- airflow/providers/standard/sensors/date_time.py +8 -32
- airflow/providers/standard/sensors/time.py +5 -28
- airflow/providers/standard/sensors/time_delta.py +10 -48
- airflow/providers/standard/sensors/weekday.py +2 -7
- {apache_airflow_providers_standard-0.1.0rc1.dist-info → apache_airflow_providers_standard-1.0.0.dev1.dist-info}/METADATA +36 -20
- apache_airflow_providers_standard-1.0.0.dev1.dist-info/RECORD +17 -0
- {apache_airflow_providers_standard-0.1.0rc1.dist-info → apache_airflow_providers_standard-1.0.0.dev1.dist-info}/WHEEL +1 -1
- airflow/providers/standard/hooks/__init__.py +0 -16
- airflow/providers/standard/hooks/filesystem.py +0 -89
- airflow/providers/standard/hooks/package_index.py +0 -95
- airflow/providers/standard/hooks/subprocess.py +0 -119
- airflow/providers/standard/operators/empty.py +0 -39
- airflow/providers/standard/operators/generic_transfer.py +0 -138
- airflow/providers/standard/operators/latest_only.py +0 -83
- airflow/providers/standard/operators/python.py +0 -1132
- airflow/providers/standard/operators/trigger_dagrun.py +0 -292
- airflow/providers/standard/sensors/external_task.py +0 -509
- airflow/providers/standard/sensors/filesystem.py +0 -158
- airflow/providers/standard/sensors/python.py +0 -85
- airflow/providers/standard/triggers/__init__.py +0 -16
- airflow/providers/standard/triggers/external_task.py +0 -211
- airflow/providers/standard/triggers/file.py +0 -131
- airflow/providers/standard/triggers/temporal.py +0 -114
- airflow/providers/standard/utils/__init__.py +0 -16
- airflow/providers/standard/utils/python_virtualenv.py +0 -209
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +0 -77
- airflow/providers/standard/utils/sensor_helper.py +0 -119
- airflow/providers/standard/version_compat.py +0 -36
- apache_airflow_providers_standard-0.1.0rc1.dist-info/RECORD +0 -38
- {apache_airflow_providers_standard-0.1.0rc1.dist-info → apache_airflow_providers_standard-1.0.0.dev1.dist-info}/entry_points.txt +0 -0
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
|
4
|
-
# distributed with this work for additional information
|
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
|
7
|
-
# "License"); you may not use this file except in compliance
|
|
8
|
-
# with the License. You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
|
13
|
-
# software distributed under the License is distributed on an
|
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
# KIND, either express or implied. See the License for the
|
|
16
|
-
# specific language governing permissions and limitations
|
|
17
|
-
# under the License.
|
|
18
|
-
from __future__ import annotations
|
|
19
|
-
|
|
20
|
-
from collections.abc import Mapping, Sequence
|
|
21
|
-
from typing import TYPE_CHECKING, Any, Callable
|
|
22
|
-
|
|
23
|
-
from airflow.sensors.base import BaseSensorOperator, PokeReturnValue
|
|
24
|
-
from airflow.utils.context import context_merge
|
|
25
|
-
from airflow.utils.operator_helpers import determine_kwargs
|
|
26
|
-
|
|
27
|
-
if TYPE_CHECKING:
|
|
28
|
-
try:
|
|
29
|
-
from airflow.sdk.definitions.context import Context
|
|
30
|
-
except ImportError:
|
|
31
|
-
# TODO: Remove once provider drops support for Airflow 2
|
|
32
|
-
from airflow.utils.context import Context
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class PythonSensor(BaseSensorOperator):
|
|
36
|
-
"""
|
|
37
|
-
Waits for a Python callable to return True.
|
|
38
|
-
|
|
39
|
-
User could put input argument in templates_dict
|
|
40
|
-
e.g ``templates_dict = {'start_ds': 1970}``
|
|
41
|
-
and access the argument by calling ``kwargs['templates_dict']['start_ds']``
|
|
42
|
-
in the callable
|
|
43
|
-
|
|
44
|
-
:param python_callable: A reference to an object that is callable
|
|
45
|
-
:param op_kwargs: a dictionary of keyword arguments that will get unpacked
|
|
46
|
-
in your function
|
|
47
|
-
:param op_args: a list of positional arguments that will get unpacked when
|
|
48
|
-
calling your callable
|
|
49
|
-
:param templates_dict: a dictionary where the values are templates that
|
|
50
|
-
will get templated by the Airflow engine sometime between
|
|
51
|
-
``__init__`` and ``execute`` takes place and are made available
|
|
52
|
-
in your callable's context after the template has been applied.
|
|
53
|
-
|
|
54
|
-
.. seealso::
|
|
55
|
-
For more information on how to use this sensor, take a look at the guide:
|
|
56
|
-
:ref:`howto/operator:PythonSensor`
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
template_fields: Sequence[str] = ("templates_dict", "op_args", "op_kwargs")
|
|
60
|
-
|
|
61
|
-
def __init__(
|
|
62
|
-
self,
|
|
63
|
-
*,
|
|
64
|
-
python_callable: Callable,
|
|
65
|
-
op_args: list | None = None,
|
|
66
|
-
op_kwargs: Mapping[str, Any] | None = None,
|
|
67
|
-
templates_dict: dict | None = None,
|
|
68
|
-
**kwargs,
|
|
69
|
-
):
|
|
70
|
-
super().__init__(**kwargs)
|
|
71
|
-
self.python_callable = python_callable
|
|
72
|
-
self.op_args = op_args or []
|
|
73
|
-
self.op_kwargs = op_kwargs or {}
|
|
74
|
-
self.templates_dict = templates_dict
|
|
75
|
-
|
|
76
|
-
def poke(self, context: Context) -> PokeReturnValue | bool:
|
|
77
|
-
context_merge(context, self.op_kwargs, templates_dict=self.templates_dict)
|
|
78
|
-
self.op_kwargs = determine_kwargs(self.python_callable, self.op_args, context)
|
|
79
|
-
|
|
80
|
-
self.log.info("Poking callable: %s", str(self.python_callable))
|
|
81
|
-
return_value = self.python_callable(*self.op_args, **self.op_kwargs)
|
|
82
|
-
if isinstance(return_value, PokeReturnValue):
|
|
83
|
-
return return_value
|
|
84
|
-
else:
|
|
85
|
-
return PokeReturnValue(bool(return_value))
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
@@ -1,211 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import asyncio
|
|
20
|
-
import typing
|
|
21
|
-
from typing import Any
|
|
22
|
-
|
|
23
|
-
from asgiref.sync import sync_to_async
|
|
24
|
-
from sqlalchemy import func
|
|
25
|
-
|
|
26
|
-
from airflow.models import DagRun
|
|
27
|
-
from airflow.providers.standard.utils.sensor_helper import _get_count
|
|
28
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
29
|
-
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
30
|
-
from airflow.utils.session import NEW_SESSION, provide_session
|
|
31
|
-
|
|
32
|
-
if typing.TYPE_CHECKING:
|
|
33
|
-
from datetime import datetime
|
|
34
|
-
|
|
35
|
-
from sqlalchemy.orm import Session
|
|
36
|
-
|
|
37
|
-
from airflow.utils.state import DagRunState
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class WorkflowTrigger(BaseTrigger):
|
|
41
|
-
"""
|
|
42
|
-
A trigger to monitor tasks, task group and dag execution in Apache Airflow.
|
|
43
|
-
|
|
44
|
-
:param external_dag_id: The ID of the external dag.
|
|
45
|
-
:param run_ids: A list of run ids for the external dag.
|
|
46
|
-
:param external_task_ids: A collection of external task IDs to wait for.
|
|
47
|
-
:param external_task_group_id: The ID of the external task group to wait for.
|
|
48
|
-
:param failed_states: States considered as failed for external tasks.
|
|
49
|
-
:param skipped_states: States considered as skipped for external tasks.
|
|
50
|
-
:param allowed_states: States considered as successful for external tasks.
|
|
51
|
-
:param poke_interval: The interval (in seconds) for poking the external tasks.
|
|
52
|
-
:param soft_fail: If True, the trigger will not fail the entire dag on external task failure.
|
|
53
|
-
"""
|
|
54
|
-
|
|
55
|
-
def __init__(
|
|
56
|
-
self,
|
|
57
|
-
external_dag_id: str,
|
|
58
|
-
run_ids: list[str] | None = None,
|
|
59
|
-
execution_dates: list[datetime] | None = None,
|
|
60
|
-
external_task_ids: typing.Collection[str] | None = None,
|
|
61
|
-
external_task_group_id: str | None = None,
|
|
62
|
-
failed_states: typing.Iterable[str] | None = None,
|
|
63
|
-
skipped_states: typing.Iterable[str] | None = None,
|
|
64
|
-
allowed_states: typing.Iterable[str] | None = None,
|
|
65
|
-
poke_interval: float = 2.0,
|
|
66
|
-
soft_fail: bool = False,
|
|
67
|
-
**kwargs,
|
|
68
|
-
):
|
|
69
|
-
self.external_dag_id = external_dag_id
|
|
70
|
-
self.external_task_ids = external_task_ids
|
|
71
|
-
self.external_task_group_id = external_task_group_id
|
|
72
|
-
self.failed_states = failed_states
|
|
73
|
-
self.skipped_states = skipped_states
|
|
74
|
-
self.allowed_states = allowed_states
|
|
75
|
-
self.run_ids = run_ids
|
|
76
|
-
self.poke_interval = poke_interval
|
|
77
|
-
self.soft_fail = soft_fail
|
|
78
|
-
self.execution_dates = execution_dates
|
|
79
|
-
super().__init__(**kwargs)
|
|
80
|
-
|
|
81
|
-
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
82
|
-
"""Serialize the trigger param and module path."""
|
|
83
|
-
data: dict[str, typing.Any] = {
|
|
84
|
-
"external_dag_id": self.external_dag_id,
|
|
85
|
-
"external_task_ids": self.external_task_ids,
|
|
86
|
-
"external_task_group_id": self.external_task_group_id,
|
|
87
|
-
"failed_states": self.failed_states,
|
|
88
|
-
"skipped_states": self.skipped_states,
|
|
89
|
-
"allowed_states": self.allowed_states,
|
|
90
|
-
"poke_interval": self.poke_interval,
|
|
91
|
-
"soft_fail": self.soft_fail,
|
|
92
|
-
}
|
|
93
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
94
|
-
data["run_ids"] = self.run_ids
|
|
95
|
-
else:
|
|
96
|
-
data["execution_dates"] = self.execution_dates
|
|
97
|
-
|
|
98
|
-
return "airflow.providers.standard.triggers.external_task.WorkflowTrigger", data
|
|
99
|
-
|
|
100
|
-
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
101
|
-
"""Check periodically tasks, task group or dag status."""
|
|
102
|
-
while True:
|
|
103
|
-
if self.failed_states:
|
|
104
|
-
failed_count = await self._get_count(self.failed_states)
|
|
105
|
-
if failed_count > 0:
|
|
106
|
-
yield TriggerEvent({"status": "failed"})
|
|
107
|
-
return
|
|
108
|
-
else:
|
|
109
|
-
yield TriggerEvent({"status": "success"})
|
|
110
|
-
return
|
|
111
|
-
if self.skipped_states:
|
|
112
|
-
skipped_count = await self._get_count(self.skipped_states)
|
|
113
|
-
if skipped_count > 0:
|
|
114
|
-
yield TriggerEvent({"status": "skipped"})
|
|
115
|
-
return
|
|
116
|
-
allowed_count = await self._get_count(self.allowed_states)
|
|
117
|
-
_dates = self.run_ids if AIRFLOW_V_3_0_PLUS else self.execution_dates
|
|
118
|
-
if allowed_count == len(_dates): # type: ignore[arg-type]
|
|
119
|
-
yield TriggerEvent({"status": "success"})
|
|
120
|
-
return
|
|
121
|
-
self.log.info("Sleeping for %s seconds", self.poke_interval)
|
|
122
|
-
await asyncio.sleep(self.poke_interval)
|
|
123
|
-
|
|
124
|
-
@sync_to_async
|
|
125
|
-
def _get_count(self, states: typing.Iterable[str] | None) -> int:
|
|
126
|
-
"""
|
|
127
|
-
Get the count of records against dttm filter and states. Async wrapper for _get_count.
|
|
128
|
-
|
|
129
|
-
:param states: task or dag states
|
|
130
|
-
:return The count of records.
|
|
131
|
-
"""
|
|
132
|
-
return _get_count(
|
|
133
|
-
dttm_filter=self.run_ids if AIRFLOW_V_3_0_PLUS else self.execution_dates,
|
|
134
|
-
external_task_ids=self.external_task_ids,
|
|
135
|
-
external_task_group_id=self.external_task_group_id,
|
|
136
|
-
external_dag_id=self.external_dag_id,
|
|
137
|
-
states=states,
|
|
138
|
-
)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
class DagStateTrigger(BaseTrigger):
|
|
142
|
-
"""
|
|
143
|
-
Waits asynchronously for a dag to complete for a specific run_id.
|
|
144
|
-
|
|
145
|
-
:param dag_id: The dag_id that contains the task you want to wait for
|
|
146
|
-
:param states: allowed states, default is ``['success']``
|
|
147
|
-
:param run_ids: The run_id of dag run.
|
|
148
|
-
:param poll_interval: The time interval in seconds to check the state.
|
|
149
|
-
The default value is 5.0 sec.
|
|
150
|
-
"""
|
|
151
|
-
|
|
152
|
-
def __init__(
|
|
153
|
-
self,
|
|
154
|
-
dag_id: str,
|
|
155
|
-
states: list[DagRunState],
|
|
156
|
-
run_ids: list[str] | None = None,
|
|
157
|
-
execution_dates: list[datetime] | None = None,
|
|
158
|
-
poll_interval: float = 5.0,
|
|
159
|
-
):
|
|
160
|
-
super().__init__()
|
|
161
|
-
self.dag_id = dag_id
|
|
162
|
-
self.states = states
|
|
163
|
-
self.run_ids = run_ids
|
|
164
|
-
self.execution_dates = execution_dates
|
|
165
|
-
self.poll_interval = poll_interval
|
|
166
|
-
|
|
167
|
-
def serialize(self) -> tuple[str, dict[str, typing.Any]]:
|
|
168
|
-
"""Serialize DagStateTrigger arguments and classpath."""
|
|
169
|
-
data = {
|
|
170
|
-
"dag_id": self.dag_id,
|
|
171
|
-
"states": self.states,
|
|
172
|
-
"poll_interval": self.poll_interval,
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
176
|
-
data["run_ids"] = self.run_ids
|
|
177
|
-
else:
|
|
178
|
-
data["execution_dates"] = self.execution_dates
|
|
179
|
-
|
|
180
|
-
return "airflow.providers.standard.triggers.external_task.DagStateTrigger", data
|
|
181
|
-
|
|
182
|
-
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
183
|
-
"""Check periodically if the dag run exists, and has hit one of the states yet, or not."""
|
|
184
|
-
while True:
|
|
185
|
-
# mypy confuses typing here
|
|
186
|
-
num_dags = await self.count_dags() # type: ignore[call-arg]
|
|
187
|
-
_dates = self.run_ids if AIRFLOW_V_3_0_PLUS else self.execution_dates
|
|
188
|
-
if num_dags == len(_dates): # type: ignore[arg-type]
|
|
189
|
-
yield TriggerEvent(self.serialize())
|
|
190
|
-
return
|
|
191
|
-
await asyncio.sleep(self.poll_interval)
|
|
192
|
-
|
|
193
|
-
@sync_to_async
|
|
194
|
-
@provide_session
|
|
195
|
-
def count_dags(self, *, session: Session = NEW_SESSION) -> int | None:
|
|
196
|
-
"""Count how many dag runs in the database match our criteria."""
|
|
197
|
-
_dag_run_date_condition = (
|
|
198
|
-
DagRun.run_id.in_(self.run_ids)
|
|
199
|
-
if AIRFLOW_V_3_0_PLUS
|
|
200
|
-
else DagRun.execution_date.in_(self.execution_dates)
|
|
201
|
-
)
|
|
202
|
-
count = (
|
|
203
|
-
session.query(func.count("*")) # .count() is inefficient
|
|
204
|
-
.filter(
|
|
205
|
-
DagRun.dag_id == self.dag_id,
|
|
206
|
-
DagRun.state.in_(self.states),
|
|
207
|
-
_dag_run_date_condition,
|
|
208
|
-
)
|
|
209
|
-
.scalar()
|
|
210
|
-
)
|
|
211
|
-
return typing.cast(int, count)
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import asyncio
|
|
20
|
-
import datetime
|
|
21
|
-
import os
|
|
22
|
-
from collections.abc import AsyncIterator
|
|
23
|
-
from glob import glob
|
|
24
|
-
from typing import Any
|
|
25
|
-
|
|
26
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
27
|
-
|
|
28
|
-
if AIRFLOW_V_3_0_PLUS:
|
|
29
|
-
from airflow.triggers.base import BaseEventTrigger, BaseTrigger, TriggerEvent
|
|
30
|
-
else:
|
|
31
|
-
from airflow.triggers.base import ( # type: ignore
|
|
32
|
-
BaseTrigger,
|
|
33
|
-
BaseTrigger as BaseEventTrigger,
|
|
34
|
-
TriggerEvent,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class FileTrigger(BaseTrigger):
|
|
39
|
-
"""
|
|
40
|
-
A trigger that fires exactly once after it finds the requested file or folder.
|
|
41
|
-
|
|
42
|
-
:param filepath: File or folder name (relative to the base path set within the connection), can
|
|
43
|
-
be a glob.
|
|
44
|
-
:param recursive: when set to ``True``, enables recursive directory matching behavior of
|
|
45
|
-
``**`` in glob filepath parameter. Defaults to ``False``.
|
|
46
|
-
:param poke_interval: Time that the job should wait in between each try
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
def __init__(
|
|
50
|
-
self,
|
|
51
|
-
filepath: str,
|
|
52
|
-
recursive: bool = False,
|
|
53
|
-
poke_interval: float = 5.0,
|
|
54
|
-
**kwargs,
|
|
55
|
-
):
|
|
56
|
-
super().__init__()
|
|
57
|
-
self.filepath = filepath
|
|
58
|
-
self.recursive = recursive
|
|
59
|
-
self.poke_interval = poke_interval
|
|
60
|
-
|
|
61
|
-
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
62
|
-
"""Serialize FileTrigger arguments and classpath."""
|
|
63
|
-
return (
|
|
64
|
-
"airflow.providers.standard.triggers.file.FileTrigger",
|
|
65
|
-
{
|
|
66
|
-
"filepath": self.filepath,
|
|
67
|
-
"recursive": self.recursive,
|
|
68
|
-
"poke_interval": self.poke_interval,
|
|
69
|
-
},
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
73
|
-
"""Loop until the relevant files are found."""
|
|
74
|
-
while True:
|
|
75
|
-
for path in glob(self.filepath, recursive=self.recursive):
|
|
76
|
-
if os.path.isfile(path):
|
|
77
|
-
mod_time_f = os.path.getmtime(path)
|
|
78
|
-
mod_time = datetime.datetime.fromtimestamp(mod_time_f).strftime("%Y%m%d%H%M%S")
|
|
79
|
-
self.log.info("Found File %s last modified: %s", path, mod_time)
|
|
80
|
-
yield TriggerEvent(True)
|
|
81
|
-
return
|
|
82
|
-
for _, _, files in os.walk(self.filepath):
|
|
83
|
-
if files:
|
|
84
|
-
yield TriggerEvent(True)
|
|
85
|
-
return
|
|
86
|
-
await asyncio.sleep(self.poke_interval)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class FileDeleteTrigger(BaseEventTrigger):
|
|
90
|
-
"""
|
|
91
|
-
A trigger that fires exactly once after it finds the requested file and then delete the file.
|
|
92
|
-
|
|
93
|
-
The difference between ``FileTrigger`` and ``FileDeleteTrigger`` is ``FileDeleteTrigger`` can only find a
|
|
94
|
-
specific file.
|
|
95
|
-
|
|
96
|
-
:param filepath: File (relative to the base path set within the connection).
|
|
97
|
-
:param poke_interval: Time that the job should wait in between each try
|
|
98
|
-
"""
|
|
99
|
-
|
|
100
|
-
def __init__(
|
|
101
|
-
self,
|
|
102
|
-
filepath: str,
|
|
103
|
-
poke_interval: float = 5.0,
|
|
104
|
-
**kwargs,
|
|
105
|
-
):
|
|
106
|
-
super().__init__()
|
|
107
|
-
self.filepath = filepath
|
|
108
|
-
self.poke_interval = poke_interval
|
|
109
|
-
|
|
110
|
-
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
111
|
-
"""Serialize FileDeleteTrigger arguments and classpath."""
|
|
112
|
-
return (
|
|
113
|
-
"airflow.providers.standard.triggers.file.FileDeleteTrigger",
|
|
114
|
-
{
|
|
115
|
-
"filepath": self.filepath,
|
|
116
|
-
"poke_interval": self.poke_interval,
|
|
117
|
-
},
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
121
|
-
"""Loop until the relevant file is found."""
|
|
122
|
-
while True:
|
|
123
|
-
if os.path.isfile(self.filepath):
|
|
124
|
-
mod_time_f = os.path.getmtime(self.filepath)
|
|
125
|
-
mod_time = datetime.datetime.fromtimestamp(mod_time_f).strftime("%Y%m%d%H%M%S")
|
|
126
|
-
self.log.info("Found file %s last modified: %s", self.filepath, mod_time)
|
|
127
|
-
os.remove(self.filepath)
|
|
128
|
-
self.log.info("File %s has been deleted", self.filepath)
|
|
129
|
-
yield TriggerEvent(True)
|
|
130
|
-
return
|
|
131
|
-
await asyncio.sleep(self.poke_interval)
|
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
from __future__ import annotations
|
|
18
|
-
|
|
19
|
-
import asyncio
|
|
20
|
-
import datetime
|
|
21
|
-
from collections.abc import AsyncIterator
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
|
-
import pendulum
|
|
25
|
-
|
|
26
|
-
from airflow.exceptions import AirflowException
|
|
27
|
-
from airflow.providers.standard.version_compat import AIRFLOW_V_2_10_PLUS
|
|
28
|
-
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
29
|
-
from airflow.utils import timezone
|
|
30
|
-
|
|
31
|
-
if AIRFLOW_V_2_10_PLUS:
|
|
32
|
-
from airflow.triggers.base import TaskSuccessEvent
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class DateTimeTrigger(BaseTrigger):
|
|
36
|
-
"""
|
|
37
|
-
Trigger based on a datetime.
|
|
38
|
-
|
|
39
|
-
A trigger that fires exactly once, at the given datetime, give or take
|
|
40
|
-
a few seconds.
|
|
41
|
-
|
|
42
|
-
The provided datetime MUST be in UTC.
|
|
43
|
-
|
|
44
|
-
:param moment: when to yield event
|
|
45
|
-
:param end_from_trigger: whether the trigger should mark the task successful after time condition
|
|
46
|
-
reached or resume the task after time condition reached.
|
|
47
|
-
"""
|
|
48
|
-
|
|
49
|
-
def __init__(self, moment: datetime.datetime, *, end_from_trigger: bool = False) -> None:
|
|
50
|
-
super().__init__()
|
|
51
|
-
if not isinstance(moment, datetime.datetime):
|
|
52
|
-
raise TypeError(f"Expected datetime.datetime type for moment. Got {type(moment)}")
|
|
53
|
-
# Make sure it's in UTC
|
|
54
|
-
elif moment.tzinfo is None:
|
|
55
|
-
raise ValueError("You cannot pass naive datetimes")
|
|
56
|
-
else:
|
|
57
|
-
self.moment: pendulum.DateTime = timezone.convert_to_utc(moment)
|
|
58
|
-
if not AIRFLOW_V_2_10_PLUS and end_from_trigger:
|
|
59
|
-
raise AirflowException("end_from_trigger is only supported in Airflow 2.10 and later. ")
|
|
60
|
-
|
|
61
|
-
self.end_from_trigger = end_from_trigger
|
|
62
|
-
|
|
63
|
-
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
64
|
-
return (
|
|
65
|
-
"airflow.providers.standard.triggers.temporal.DateTimeTrigger",
|
|
66
|
-
{"moment": self.moment, "end_from_trigger": self.end_from_trigger},
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
70
|
-
"""
|
|
71
|
-
Loop until the relevant time is met.
|
|
72
|
-
|
|
73
|
-
We do have a two-phase delay to save some cycles, but sleeping is so
|
|
74
|
-
cheap anyway that it's pretty loose. We also don't just sleep for
|
|
75
|
-
"the number of seconds until the time" in case the system clock changes
|
|
76
|
-
unexpectedly, or handles a DST change poorly.
|
|
77
|
-
"""
|
|
78
|
-
# Sleep in successively smaller increments starting from 1 hour down to 10 seconds at a time
|
|
79
|
-
self.log.info("trigger starting")
|
|
80
|
-
for step in 3600, 60, 10:
|
|
81
|
-
seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
|
|
82
|
-
while seconds_remaining > 2 * step:
|
|
83
|
-
self.log.info("%d seconds remaining; sleeping %s seconds", seconds_remaining, step)
|
|
84
|
-
await asyncio.sleep(step)
|
|
85
|
-
seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
|
|
86
|
-
# Sleep a second at a time otherwise
|
|
87
|
-
while self.moment > pendulum.instance(timezone.utcnow()):
|
|
88
|
-
self.log.info("sleeping 1 second...")
|
|
89
|
-
await asyncio.sleep(1)
|
|
90
|
-
if self.end_from_trigger:
|
|
91
|
-
self.log.info("Sensor time condition reached; marking task successful and exiting")
|
|
92
|
-
yield TaskSuccessEvent()
|
|
93
|
-
else:
|
|
94
|
-
self.log.info("yielding event with payload %r", self.moment)
|
|
95
|
-
yield TriggerEvent(self.moment)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
class TimeDeltaTrigger(DateTimeTrigger):
|
|
99
|
-
"""
|
|
100
|
-
Create DateTimeTriggers based on delays.
|
|
101
|
-
|
|
102
|
-
Subclass to create DateTimeTriggers based on time delays rather
|
|
103
|
-
than exact moments.
|
|
104
|
-
|
|
105
|
-
While this is its own distinct class here, it will serialise to a
|
|
106
|
-
DateTimeTrigger class, since they're operationally the same.
|
|
107
|
-
|
|
108
|
-
:param delta: how long to wait
|
|
109
|
-
:param end_from_trigger: whether the trigger should mark the task successful after time condition
|
|
110
|
-
reached or resume the task after time condition reached.
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
def __init__(self, delta: datetime.timedelta, *, end_from_trigger: bool = False) -> None:
|
|
114
|
-
super().__init__(moment=timezone.utcnow() + delta, end_from_trigger=end_from_trigger)
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|