apache-airflow-providers-standard 0.0.2rc1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airflow/providers/standard/__init__.py +3 -3
- airflow/providers/standard/get_provider_info.py +15 -3
- airflow/providers/standard/operators/bash.py +17 -10
- airflow/providers/standard/operators/latest_only.py +78 -0
- airflow/providers/standard/operators/python.py +33 -53
- airflow/providers/standard/operators/trigger_dagrun.py +1 -10
- airflow/providers/standard/sensors/date_time.py +3 -3
- airflow/providers/standard/sensors/external_task.py +512 -0
- airflow/providers/standard/sensors/filesystem.py +18 -3
- airflow/providers/standard/sensors/time.py +3 -3
- airflow/providers/standard/sensors/time_delta.py +22 -3
- airflow/providers/standard/{utils/version_references.py → triggers/__init__.py} +0 -10
- airflow/providers/standard/triggers/external_task.py +216 -0
- airflow/providers/standard/triggers/file.py +77 -0
- airflow/providers/standard/triggers/temporal.py +114 -0
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +2 -2
- airflow/providers/standard/utils/sensor_helper.py +123 -0
- airflow/providers/standard/version_compat.py +36 -0
- {apache_airflow_providers_standard-0.0.2rc1.dist-info → apache_airflow_providers_standard-0.0.3.dist-info}/METADATA +10 -10
- apache_airflow_providers_standard-0.0.3.dist-info/RECORD +37 -0
- apache_airflow_providers_standard-0.0.2rc1.dist-info/RECORD +0 -30
- {apache_airflow_providers_standard-0.0.2rc1.dist-info → apache_airflow_providers_standard-0.0.3.dist-info}/WHEEL +0 -0
- {apache_airflow_providers_standard-0.0.2rc1.dist-info → apache_airflow_providers_standard-0.0.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import typing
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from asgiref.sync import sync_to_async
|
|
24
|
+
from sqlalchemy import func
|
|
25
|
+
|
|
26
|
+
from airflow.models import DagRun
|
|
27
|
+
from airflow.providers.standard.utils.sensor_helper import _get_count
|
|
28
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
|
|
29
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
30
|
+
from airflow.utils.session import NEW_SESSION, provide_session
|
|
31
|
+
|
|
32
|
+
if typing.TYPE_CHECKING:
|
|
33
|
+
from datetime import datetime
|
|
34
|
+
|
|
35
|
+
from sqlalchemy.orm import Session
|
|
36
|
+
|
|
37
|
+
from airflow.utils.state import DagRunState
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class WorkflowTrigger(BaseTrigger):
|
|
41
|
+
"""
|
|
42
|
+
A trigger to monitor tasks, task group and dag execution in Apache Airflow.
|
|
43
|
+
|
|
44
|
+
:param external_dag_id: The ID of the external DAG.
|
|
45
|
+
:param logical_dates: A list of logical dates for the external DAG.
|
|
46
|
+
:param external_task_ids: A collection of external task IDs to wait for.
|
|
47
|
+
:param external_task_group_id: The ID of the external task group to wait for.
|
|
48
|
+
:param failed_states: States considered as failed for external tasks.
|
|
49
|
+
:param skipped_states: States considered as skipped for external tasks.
|
|
50
|
+
:param allowed_states: States considered as successful for external tasks.
|
|
51
|
+
:param poke_interval: The interval (in seconds) for poking the external tasks.
|
|
52
|
+
:param soft_fail: If True, the trigger will not fail the entire DAG on external task failure.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
external_dag_id: str,
|
|
58
|
+
logical_dates: list[datetime] | None = None,
|
|
59
|
+
execution_dates: list[datetime] | None = None,
|
|
60
|
+
external_task_ids: typing.Collection[str] | None = None,
|
|
61
|
+
external_task_group_id: str | None = None,
|
|
62
|
+
failed_states: typing.Iterable[str] | None = None,
|
|
63
|
+
skipped_states: typing.Iterable[str] | None = None,
|
|
64
|
+
allowed_states: typing.Iterable[str] | None = None,
|
|
65
|
+
poke_interval: float = 2.0,
|
|
66
|
+
soft_fail: bool = False,
|
|
67
|
+
**kwargs,
|
|
68
|
+
):
|
|
69
|
+
self.external_dag_id = external_dag_id
|
|
70
|
+
self.external_task_ids = external_task_ids
|
|
71
|
+
self.external_task_group_id = external_task_group_id
|
|
72
|
+
self.failed_states = failed_states
|
|
73
|
+
self.skipped_states = skipped_states
|
|
74
|
+
self.allowed_states = allowed_states
|
|
75
|
+
self.logical_dates = logical_dates
|
|
76
|
+
self.poke_interval = poke_interval
|
|
77
|
+
self.soft_fail = soft_fail
|
|
78
|
+
self.execution_dates = execution_dates
|
|
79
|
+
super().__init__(**kwargs)
|
|
80
|
+
|
|
81
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
82
|
+
"""Serialize the trigger param and module path."""
|
|
83
|
+
_dates = (
|
|
84
|
+
{"logical_dates": self.logical_dates}
|
|
85
|
+
if AIRFLOW_V_3_0_PLUS
|
|
86
|
+
else {"execution_dates": self.execution_dates}
|
|
87
|
+
)
|
|
88
|
+
return (
|
|
89
|
+
"airflow.providers.standard.triggers.external_task.WorkflowTrigger",
|
|
90
|
+
{
|
|
91
|
+
"external_dag_id": self.external_dag_id,
|
|
92
|
+
"external_task_ids": self.external_task_ids,
|
|
93
|
+
"external_task_group_id": self.external_task_group_id,
|
|
94
|
+
"failed_states": self.failed_states,
|
|
95
|
+
"skipped_states": self.skipped_states,
|
|
96
|
+
"allowed_states": self.allowed_states,
|
|
97
|
+
**_dates,
|
|
98
|
+
"poke_interval": self.poke_interval,
|
|
99
|
+
"soft_fail": self.soft_fail,
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
104
|
+
"""Check periodically tasks, task group or dag status."""
|
|
105
|
+
while True:
|
|
106
|
+
if self.failed_states:
|
|
107
|
+
failed_count = await self._get_count(self.failed_states)
|
|
108
|
+
if failed_count > 0:
|
|
109
|
+
yield TriggerEvent({"status": "failed"})
|
|
110
|
+
return
|
|
111
|
+
else:
|
|
112
|
+
yield TriggerEvent({"status": "success"})
|
|
113
|
+
return
|
|
114
|
+
if self.skipped_states:
|
|
115
|
+
skipped_count = await self._get_count(self.skipped_states)
|
|
116
|
+
if skipped_count > 0:
|
|
117
|
+
yield TriggerEvent({"status": "skipped"})
|
|
118
|
+
return
|
|
119
|
+
allowed_count = await self._get_count(self.allowed_states)
|
|
120
|
+
_dates = self.logical_dates if AIRFLOW_V_3_0_PLUS else self.execution_dates
|
|
121
|
+
if allowed_count == len(_dates): # type: ignore[arg-type]
|
|
122
|
+
yield TriggerEvent({"status": "success"})
|
|
123
|
+
return
|
|
124
|
+
self.log.info("Sleeping for %s seconds", self.poke_interval)
|
|
125
|
+
await asyncio.sleep(self.poke_interval)
|
|
126
|
+
|
|
127
|
+
@sync_to_async
|
|
128
|
+
def _get_count(self, states: typing.Iterable[str] | None) -> int:
|
|
129
|
+
"""
|
|
130
|
+
Get the count of records against dttm filter and states. Async wrapper for _get_count.
|
|
131
|
+
|
|
132
|
+
:param states: task or dag states
|
|
133
|
+
:return The count of records.
|
|
134
|
+
"""
|
|
135
|
+
return _get_count(
|
|
136
|
+
dttm_filter=self.logical_dates if AIRFLOW_V_3_0_PLUS else self.execution_dates,
|
|
137
|
+
external_task_ids=self.external_task_ids,
|
|
138
|
+
external_task_group_id=self.external_task_group_id,
|
|
139
|
+
external_dag_id=self.external_dag_id,
|
|
140
|
+
states=states,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class DagStateTrigger(BaseTrigger):
|
|
145
|
+
"""
|
|
146
|
+
Waits asynchronously for a DAG to complete for a specific logical date.
|
|
147
|
+
|
|
148
|
+
:param dag_id: The dag_id that contains the task you want to wait for
|
|
149
|
+
:param states: allowed states, default is ``['success']``
|
|
150
|
+
:param logical_dates: The logical date at which DAG run.
|
|
151
|
+
:param poll_interval: The time interval in seconds to check the state.
|
|
152
|
+
The default value is 5.0 sec.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
dag_id: str,
|
|
158
|
+
states: list[DagRunState],
|
|
159
|
+
logical_dates: list[datetime] | None = None,
|
|
160
|
+
execution_dates: list[datetime] | None = None,
|
|
161
|
+
poll_interval: float = 5.0,
|
|
162
|
+
):
|
|
163
|
+
super().__init__()
|
|
164
|
+
self.dag_id = dag_id
|
|
165
|
+
self.states = states
|
|
166
|
+
self.logical_dates = logical_dates
|
|
167
|
+
self.execution_dates = execution_dates
|
|
168
|
+
self.poll_interval = poll_interval
|
|
169
|
+
|
|
170
|
+
def serialize(self) -> tuple[str, dict[str, typing.Any]]:
|
|
171
|
+
"""Serialize DagStateTrigger arguments and classpath."""
|
|
172
|
+
_dates = (
|
|
173
|
+
{"logical_dates": self.logical_dates}
|
|
174
|
+
if AIRFLOW_V_3_0_PLUS
|
|
175
|
+
else {"execution_dates": self.execution_dates}
|
|
176
|
+
)
|
|
177
|
+
return (
|
|
178
|
+
"airflow.providers.standard.triggers.external_task.DagStateTrigger",
|
|
179
|
+
{
|
|
180
|
+
"dag_id": self.dag_id,
|
|
181
|
+
"states": self.states,
|
|
182
|
+
**_dates,
|
|
183
|
+
"poll_interval": self.poll_interval,
|
|
184
|
+
},
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
188
|
+
"""Check periodically if the dag run exists, and has hit one of the states yet, or not."""
|
|
189
|
+
while True:
|
|
190
|
+
# mypy confuses typing here
|
|
191
|
+
num_dags = await self.count_dags() # type: ignore[call-arg]
|
|
192
|
+
_dates = self.logical_dates if AIRFLOW_V_3_0_PLUS else self.execution_dates
|
|
193
|
+
if num_dags == len(_dates): # type: ignore[arg-type]
|
|
194
|
+
yield TriggerEvent(self.serialize())
|
|
195
|
+
return
|
|
196
|
+
await asyncio.sleep(self.poll_interval)
|
|
197
|
+
|
|
198
|
+
@sync_to_async
|
|
199
|
+
@provide_session
|
|
200
|
+
def count_dags(self, *, session: Session = NEW_SESSION) -> int | None:
|
|
201
|
+
"""Count how many dag runs in the database match our criteria."""
|
|
202
|
+
_dag_run_date_condition = (
|
|
203
|
+
DagRun.logical_date.in_(self.logical_dates)
|
|
204
|
+
if AIRFLOW_V_3_0_PLUS
|
|
205
|
+
else DagRun.execution_date.in_(self.execution_dates)
|
|
206
|
+
)
|
|
207
|
+
count = (
|
|
208
|
+
session.query(func.count("*")) # .count() is inefficient
|
|
209
|
+
.filter(
|
|
210
|
+
DagRun.dag_id == self.dag_id,
|
|
211
|
+
DagRun.state.in_(self.states),
|
|
212
|
+
_dag_run_date_condition,
|
|
213
|
+
)
|
|
214
|
+
.scalar()
|
|
215
|
+
)
|
|
216
|
+
return typing.cast(int, count)
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import datetime
|
|
21
|
+
import os
|
|
22
|
+
import typing
|
|
23
|
+
from glob import glob
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FileTrigger(BaseTrigger):
|
|
30
|
+
"""
|
|
31
|
+
A trigger that fires exactly once after it finds the requested file or folder.
|
|
32
|
+
|
|
33
|
+
:param filepath: File or folder name (relative to the base path set within the connection), can
|
|
34
|
+
be a glob.
|
|
35
|
+
:param recursive: when set to ``True``, enables recursive directory matching behavior of
|
|
36
|
+
``**`` in glob filepath parameter. Defaults to ``False``.
|
|
37
|
+
:param poke_interval: Time that the job should wait in between each try
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
filepath: str,
|
|
43
|
+
recursive: bool = False,
|
|
44
|
+
poke_interval: float = 5.0,
|
|
45
|
+
**kwargs,
|
|
46
|
+
):
|
|
47
|
+
super().__init__()
|
|
48
|
+
self.filepath = filepath
|
|
49
|
+
self.recursive = recursive
|
|
50
|
+
self.poke_interval = poke_interval
|
|
51
|
+
|
|
52
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
53
|
+
"""Serialize FileTrigger arguments and classpath."""
|
|
54
|
+
return (
|
|
55
|
+
"airflow.providers.standard.triggers.file.FileTrigger",
|
|
56
|
+
{
|
|
57
|
+
"filepath": self.filepath,
|
|
58
|
+
"recursive": self.recursive,
|
|
59
|
+
"poke_interval": self.poke_interval,
|
|
60
|
+
},
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
async def run(self) -> typing.AsyncIterator[TriggerEvent]:
|
|
64
|
+
"""Loop until the relevant files are found."""
|
|
65
|
+
while True:
|
|
66
|
+
for path in glob(self.filepath, recursive=self.recursive):
|
|
67
|
+
if os.path.isfile(path):
|
|
68
|
+
mod_time_f = os.path.getmtime(path)
|
|
69
|
+
mod_time = datetime.datetime.fromtimestamp(mod_time_f).strftime("%Y%m%d%H%M%S")
|
|
70
|
+
self.log.info("Found File %s last modified: %s", path, mod_time)
|
|
71
|
+
yield TriggerEvent(True)
|
|
72
|
+
return
|
|
73
|
+
for _, _, files in os.walk(self.filepath):
|
|
74
|
+
if files:
|
|
75
|
+
yield TriggerEvent(True)
|
|
76
|
+
return
|
|
77
|
+
await asyncio.sleep(self.poke_interval)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import datetime
|
|
21
|
+
from collections.abc import AsyncIterator
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
import pendulum
|
|
25
|
+
|
|
26
|
+
from airflow.exceptions import AirflowException
|
|
27
|
+
from airflow.providers.standard.version_compat import AIRFLOW_V_2_10_PLUS
|
|
28
|
+
from airflow.triggers.base import BaseTrigger, TriggerEvent
|
|
29
|
+
from airflow.utils import timezone
|
|
30
|
+
|
|
31
|
+
if AIRFLOW_V_2_10_PLUS:
|
|
32
|
+
from airflow.triggers.base import TaskSuccessEvent
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DateTimeTrigger(BaseTrigger):
|
|
36
|
+
"""
|
|
37
|
+
Trigger based on a datetime.
|
|
38
|
+
|
|
39
|
+
A trigger that fires exactly once, at the given datetime, give or take
|
|
40
|
+
a few seconds.
|
|
41
|
+
|
|
42
|
+
The provided datetime MUST be in UTC.
|
|
43
|
+
|
|
44
|
+
:param moment: when to yield event
|
|
45
|
+
:param end_from_trigger: whether the trigger should mark the task successful after time condition
|
|
46
|
+
reached or resume the task after time condition reached.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, moment: datetime.datetime, *, end_from_trigger: bool = False) -> None:
|
|
50
|
+
super().__init__()
|
|
51
|
+
if not isinstance(moment, datetime.datetime):
|
|
52
|
+
raise TypeError(f"Expected datetime.datetime type for moment. Got {type(moment)}")
|
|
53
|
+
# Make sure it's in UTC
|
|
54
|
+
elif moment.tzinfo is None:
|
|
55
|
+
raise ValueError("You cannot pass naive datetimes")
|
|
56
|
+
else:
|
|
57
|
+
self.moment: pendulum.DateTime = timezone.convert_to_utc(moment)
|
|
58
|
+
if not AIRFLOW_V_2_10_PLUS and end_from_trigger:
|
|
59
|
+
raise AirflowException("end_from_trigger is only supported in Airflow 2.10 and later. ")
|
|
60
|
+
|
|
61
|
+
self.end_from_trigger = end_from_trigger
|
|
62
|
+
|
|
63
|
+
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
64
|
+
return (
|
|
65
|
+
"airflow.providers.standard.triggers.temporal.DateTimeTrigger",
|
|
66
|
+
{"moment": self.moment, "end_from_trigger": self.end_from_trigger},
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
async def run(self) -> AsyncIterator[TriggerEvent]:
|
|
70
|
+
"""
|
|
71
|
+
Loop until the relevant time is met.
|
|
72
|
+
|
|
73
|
+
We do have a two-phase delay to save some cycles, but sleeping is so
|
|
74
|
+
cheap anyway that it's pretty loose. We also don't just sleep for
|
|
75
|
+
"the number of seconds until the time" in case the system clock changes
|
|
76
|
+
unexpectedly, or handles a DST change poorly.
|
|
77
|
+
"""
|
|
78
|
+
# Sleep in successively smaller increments starting from 1 hour down to 10 seconds at a time
|
|
79
|
+
self.log.info("trigger starting")
|
|
80
|
+
for step in 3600, 60, 10:
|
|
81
|
+
seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
|
|
82
|
+
while seconds_remaining > 2 * step:
|
|
83
|
+
self.log.info("%d seconds remaining; sleeping %s seconds", seconds_remaining, step)
|
|
84
|
+
await asyncio.sleep(step)
|
|
85
|
+
seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
|
|
86
|
+
# Sleep a second at a time otherwise
|
|
87
|
+
while self.moment > pendulum.instance(timezone.utcnow()):
|
|
88
|
+
self.log.info("sleeping 1 second...")
|
|
89
|
+
await asyncio.sleep(1)
|
|
90
|
+
if self.end_from_trigger:
|
|
91
|
+
self.log.info("Sensor time condition reached; marking task successful and exiting")
|
|
92
|
+
yield TaskSuccessEvent()
|
|
93
|
+
else:
|
|
94
|
+
self.log.info("yielding event with payload %r", self.moment)
|
|
95
|
+
yield TriggerEvent(self.moment)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class TimeDeltaTrigger(DateTimeTrigger):
|
|
99
|
+
"""
|
|
100
|
+
Create DateTimeTriggers based on delays.
|
|
101
|
+
|
|
102
|
+
Subclass to create DateTimeTriggers based on time delays rather
|
|
103
|
+
than exact moments.
|
|
104
|
+
|
|
105
|
+
While this is its own distinct class here, it will serialise to a
|
|
106
|
+
DateTimeTrigger class, since they're operationally the same.
|
|
107
|
+
|
|
108
|
+
:param delta: how long to wait
|
|
109
|
+
:param end_from_trigger: whether the trigger should mark the task successful after time condition
|
|
110
|
+
reached or resume the task after time condition reached.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self, delta: datetime.timedelta, *, end_from_trigger: bool = False) -> None:
|
|
114
|
+
super().__init__(moment=timezone.utcnow() + delta, end_from_trigger=end_from_trigger)
|
|
@@ -70,7 +70,6 @@ if len(sys.argv) > 5:
|
|
|
70
70
|
from types import ModuleType
|
|
71
71
|
|
|
72
72
|
from airflow.providers.standard.operators import python as airflow_python
|
|
73
|
-
from airflow.serialization.serialized_objects import BaseSerialization
|
|
74
73
|
|
|
75
74
|
|
|
76
75
|
class _MockPython(ModuleType):
|
|
@@ -78,7 +77,8 @@ if len(sys.argv) > 5:
|
|
|
78
77
|
def get_current_context():
|
|
79
78
|
with open(sys.argv[5]) as file:
|
|
80
79
|
context = json.load(file)
|
|
81
|
-
|
|
80
|
+
raise Exception("Not yet implemented")
|
|
81
|
+
# TODO: return deserialized context
|
|
82
82
|
|
|
83
83
|
def __getattr__(self, name: str):
|
|
84
84
|
return getattr(airflow_python, name)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import TYPE_CHECKING, cast
|
|
20
|
+
|
|
21
|
+
from sqlalchemy import func, select
|
|
22
|
+
|
|
23
|
+
from airflow.models import DagBag, DagRun, TaskInstance
|
|
24
|
+
from airflow.utils.session import NEW_SESSION, provide_session
|
|
25
|
+
from airflow.utils.sqlalchemy import tuple_in_condition
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from sqlalchemy.orm import Query, Session
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@provide_session
|
|
32
|
+
def _get_count(
|
|
33
|
+
dttm_filter,
|
|
34
|
+
external_task_ids,
|
|
35
|
+
external_task_group_id,
|
|
36
|
+
external_dag_id,
|
|
37
|
+
states,
|
|
38
|
+
session: Session = NEW_SESSION,
|
|
39
|
+
) -> int:
|
|
40
|
+
"""
|
|
41
|
+
Get the count of records against dttm filter and states.
|
|
42
|
+
|
|
43
|
+
:param dttm_filter: date time filter for logical date
|
|
44
|
+
:param external_task_ids: The list of task_ids
|
|
45
|
+
:param external_task_group_id: The ID of the external task group
|
|
46
|
+
:param external_dag_id: The ID of the external DAG.
|
|
47
|
+
:param states: task or dag states
|
|
48
|
+
:param session: airflow session object
|
|
49
|
+
"""
|
|
50
|
+
TI = TaskInstance
|
|
51
|
+
DR = DagRun
|
|
52
|
+
if not dttm_filter:
|
|
53
|
+
return 0
|
|
54
|
+
|
|
55
|
+
if external_task_ids:
|
|
56
|
+
count = (
|
|
57
|
+
session.scalar(
|
|
58
|
+
_count_query(TI, states, dttm_filter, external_dag_id, session).filter(
|
|
59
|
+
TI.task_id.in_(external_task_ids)
|
|
60
|
+
)
|
|
61
|
+
)
|
|
62
|
+
) / len(external_task_ids)
|
|
63
|
+
elif external_task_group_id:
|
|
64
|
+
external_task_group_task_ids = _get_external_task_group_task_ids(
|
|
65
|
+
dttm_filter, external_task_group_id, external_dag_id, session
|
|
66
|
+
)
|
|
67
|
+
if not external_task_group_task_ids:
|
|
68
|
+
count = 0
|
|
69
|
+
else:
|
|
70
|
+
count = (
|
|
71
|
+
session.scalar(
|
|
72
|
+
_count_query(TI, states, dttm_filter, external_dag_id, session).filter(
|
|
73
|
+
tuple_in_condition((TI.task_id, TI.map_index), external_task_group_task_ids)
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
) / len(external_task_group_task_ids)
|
|
77
|
+
else:
|
|
78
|
+
count = session.scalar(_count_query(DR, states, dttm_filter, external_dag_id, session))
|
|
79
|
+
return cast(int, count)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _count_query(model, states, dttm_filter, external_dag_id, session: Session) -> Query:
|
|
83
|
+
"""
|
|
84
|
+
Get the count of records against dttm filter and states.
|
|
85
|
+
|
|
86
|
+
:param model: The SQLAlchemy model representing the relevant table.
|
|
87
|
+
:param states: task or dag states
|
|
88
|
+
:param dttm_filter: date time filter for logical date
|
|
89
|
+
:param external_dag_id: The ID of the external DAG.
|
|
90
|
+
:param session: airflow session object
|
|
91
|
+
"""
|
|
92
|
+
query = select(func.count()).filter(
|
|
93
|
+
model.dag_id == external_dag_id, model.state.in_(states), model.logical_date.in_(dttm_filter)
|
|
94
|
+
)
|
|
95
|
+
return query
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _get_external_task_group_task_ids(dttm_filter, external_task_group_id, external_dag_id, session):
|
|
99
|
+
"""
|
|
100
|
+
Get the count of records against dttm filter and states.
|
|
101
|
+
|
|
102
|
+
:param dttm_filter: date time filter for logical date
|
|
103
|
+
:param external_task_group_id: The ID of the external task group
|
|
104
|
+
:param external_dag_id: The ID of the external DAG.
|
|
105
|
+
:param session: airflow session object
|
|
106
|
+
"""
|
|
107
|
+
refreshed_dag_info = DagBag(read_dags_from_db=True).get_dag(external_dag_id, session)
|
|
108
|
+
task_group = refreshed_dag_info.task_group_dict.get(external_task_group_id)
|
|
109
|
+
|
|
110
|
+
if task_group:
|
|
111
|
+
group_tasks = session.scalars(
|
|
112
|
+
select(TaskInstance).filter(
|
|
113
|
+
TaskInstance.dag_id == external_dag_id,
|
|
114
|
+
TaskInstance.task_id.in_(task.task_id for task in task_group),
|
|
115
|
+
TaskInstance.logical_date.in_(dttm_filter),
|
|
116
|
+
)
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return [(t.task_id, t.map_index) for t in group_tasks]
|
|
120
|
+
|
|
121
|
+
# returning default task_id as group_id itself, this will avoid any failure in case of
|
|
122
|
+
# 'check_existence=False' and will fail on timeout
|
|
123
|
+
return [(external_task_group_id, -1)]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
#
|
|
18
|
+
# NOTE! THIS FILE IS COPIED MANUALLY IN OTHER PROVIDERS DELIBERATELY TO AVOID ADDING UNNECESSARY
|
|
19
|
+
# DEPENDENCIES BETWEEN PROVIDERS. IF YOU WANT TO ADD CONDITIONAL CODE IN YOUR PROVIDER THAT DEPENDS
|
|
20
|
+
# ON AIRFLOW VERSION, PLEASE COPY THIS FILE TO THE ROOT PACKAGE OF YOUR PROVIDER AND IMPORT
|
|
21
|
+
# THOSE CONSTANTS FROM IT RATHER THAN IMPORTING THEM FROM ANOTHER PROVIDER OR TEST CODE
|
|
22
|
+
#
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_base_airflow_version_tuple() -> tuple[int, int, int]:
|
|
27
|
+
from packaging.version import Version
|
|
28
|
+
|
|
29
|
+
from airflow import __version__
|
|
30
|
+
|
|
31
|
+
airflow_version = Version(__version__)
|
|
32
|
+
return airflow_version.major, airflow_version.minor, airflow_version.micro
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
AIRFLOW_V_2_10_PLUS = get_base_airflow_version_tuple() >= (2, 10, 0)
|
|
36
|
+
AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: apache-airflow-providers-standard
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: Provider package apache-airflow-providers-standard for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,standard,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -20,14 +20,14 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Topic :: System :: Monitoring
|
|
23
|
-
Requires-Dist: apache-airflow-providers-common-sql>=1.20.
|
|
24
|
-
Requires-Dist: apache-airflow>=2.
|
|
23
|
+
Requires-Dist: apache-airflow-providers-common-sql>=1.20.0
|
|
24
|
+
Requires-Dist: apache-airflow>=2.9.0
|
|
25
25
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
26
|
-
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.
|
|
27
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.
|
|
26
|
+
Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.3/changelog.html
|
|
27
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.3
|
|
28
28
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
29
29
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
30
|
-
Project-URL: Twitter, https://
|
|
30
|
+
Project-URL: Twitter, https://x.com/ApacheAirflow
|
|
31
31
|
Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
32
32
|
|
|
33
33
|
|
|
@@ -74,7 +74,7 @@ Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
|
|
|
74
74
|
|
|
75
75
|
Package ``apache-airflow-providers-standard``
|
|
76
76
|
|
|
77
|
-
Release: ``0.0.
|
|
77
|
+
Release: ``0.0.3``
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
Airflow Standard Provider
|
|
@@ -87,7 +87,7 @@ This is a provider package for ``standard`` provider. All classes for this provi
|
|
|
87
87
|
are in ``airflow.providers.standard`` python package.
|
|
88
88
|
|
|
89
89
|
You can find package information and changelog for the provider
|
|
90
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.
|
|
90
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.3/>`_.
|
|
91
91
|
|
|
92
92
|
Installation
|
|
93
93
|
------------
|
|
@@ -104,9 +104,9 @@ Requirements
|
|
|
104
104
|
======================================= ==================
|
|
105
105
|
PIP package Version required
|
|
106
106
|
======================================= ==================
|
|
107
|
-
``apache-airflow`` ``>=2.
|
|
107
|
+
``apache-airflow`` ``>=2.9.0``
|
|
108
108
|
``apache-airflow-providers-common-sql`` ``>=1.20.0``
|
|
109
109
|
======================================= ==================
|
|
110
110
|
|
|
111
111
|
The changelog for the provider package can be found in the
|
|
112
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.
|
|
112
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-standard/0.0.3/changelog.html>`_.
|