apache-airflow-providers-standard 0.0.3rc2__py3-none-any.whl → 1.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-standard might be problematic. Click here for more details.

Files changed (35) hide show
  1. airflow/providers/standard/__init__.py +1 -23
  2. airflow/providers/standard/get_provider_info.py +5 -52
  3. airflow/providers/standard/operators/datetime.py +2 -3
  4. airflow/providers/standard/operators/weekday.py +1 -4
  5. airflow/providers/standard/sensors/date_time.py +7 -27
  6. airflow/providers/standard/sensors/time.py +4 -23
  7. airflow/providers/standard/sensors/time_delta.py +4 -29
  8. airflow/providers/standard/sensors/weekday.py +1 -2
  9. {apache_airflow_providers_standard-0.0.3rc2.dist-info → apache_airflow_providers_standard-1.0.0.dev0.dist-info}/METADATA +17 -18
  10. apache_airflow_providers_standard-1.0.0.dev0.dist-info/RECORD +15 -0
  11. {apache_airflow_providers_standard-0.0.3rc2.dist-info → apache_airflow_providers_standard-1.0.0.dev0.dist-info}/WHEEL +1 -1
  12. airflow/providers/standard/hooks/__init__.py +0 -16
  13. airflow/providers/standard/hooks/filesystem.py +0 -89
  14. airflow/providers/standard/hooks/package_index.py +0 -95
  15. airflow/providers/standard/hooks/subprocess.py +0 -119
  16. airflow/providers/standard/operators/bash.py +0 -312
  17. airflow/providers/standard/operators/generic_transfer.py +0 -134
  18. airflow/providers/standard/operators/latest_only.py +0 -78
  19. airflow/providers/standard/operators/python.py +0 -1155
  20. airflow/providers/standard/operators/trigger_dagrun.py +0 -296
  21. airflow/providers/standard/sensors/bash.py +0 -116
  22. airflow/providers/standard/sensors/external_task.py +0 -512
  23. airflow/providers/standard/sensors/filesystem.py +0 -154
  24. airflow/providers/standard/sensors/python.py +0 -81
  25. airflow/providers/standard/triggers/__init__.py +0 -16
  26. airflow/providers/standard/triggers/external_task.py +0 -216
  27. airflow/providers/standard/triggers/file.py +0 -77
  28. airflow/providers/standard/triggers/temporal.py +0 -114
  29. airflow/providers/standard/utils/__init__.py +0 -16
  30. airflow/providers/standard/utils/python_virtualenv.py +0 -209
  31. airflow/providers/standard/utils/python_virtualenv_script.jinja2 +0 -101
  32. airflow/providers/standard/utils/sensor_helper.py +0 -123
  33. airflow/providers/standard/version_compat.py +0 -36
  34. apache_airflow_providers_standard-0.0.3rc2.dist-info/RECORD +0 -37
  35. {apache_airflow_providers_standard-0.0.3rc2.dist-info → apache_airflow_providers_standard-1.0.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,81 +0,0 @@
1
- #
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- from __future__ import annotations
19
-
20
- from collections.abc import Mapping, Sequence
21
- from typing import TYPE_CHECKING, Any, Callable
22
-
23
- from airflow.sensors.base import BaseSensorOperator, PokeReturnValue
24
- from airflow.utils.context import context_merge
25
- from airflow.utils.operator_helpers import determine_kwargs
26
-
27
- if TYPE_CHECKING:
28
- from airflow.utils.context import Context
29
-
30
-
31
- class PythonSensor(BaseSensorOperator):
32
- """
33
- Waits for a Python callable to return True.
34
-
35
- User could put input argument in templates_dict
36
- e.g ``templates_dict = {'start_ds': 1970}``
37
- and access the argument by calling ``kwargs['templates_dict']['start_ds']``
38
- in the callable
39
-
40
- :param python_callable: A reference to an object that is callable
41
- :param op_kwargs: a dictionary of keyword arguments that will get unpacked
42
- in your function
43
- :param op_args: a list of positional arguments that will get unpacked when
44
- calling your callable
45
- :param templates_dict: a dictionary where the values are templates that
46
- will get templated by the Airflow engine sometime between
47
- ``__init__`` and ``execute`` takes place and are made available
48
- in your callable's context after the template has been applied.
49
-
50
- .. seealso::
51
- For more information on how to use this sensor, take a look at the guide:
52
- :ref:`howto/operator:PythonSensor`
53
- """
54
-
55
- template_fields: Sequence[str] = ("templates_dict", "op_args", "op_kwargs")
56
-
57
- def __init__(
58
- self,
59
- *,
60
- python_callable: Callable,
61
- op_args: list | None = None,
62
- op_kwargs: Mapping[str, Any] | None = None,
63
- templates_dict: dict | None = None,
64
- **kwargs,
65
- ):
66
- super().__init__(**kwargs)
67
- self.python_callable = python_callable
68
- self.op_args = op_args or []
69
- self.op_kwargs = op_kwargs or {}
70
- self.templates_dict = templates_dict
71
-
72
- def poke(self, context: Context) -> PokeReturnValue | bool:
73
- context_merge(context, self.op_kwargs, templates_dict=self.templates_dict)
74
- self.op_kwargs = determine_kwargs(self.python_callable, self.op_args, context)
75
-
76
- self.log.info("Poking callable: %s", str(self.python_callable))
77
- return_value = self.python_callable(*self.op_args, **self.op_kwargs)
78
- if isinstance(return_value, PokeReturnValue):
79
- return return_value
80
- else:
81
- return PokeReturnValue(bool(return_value))
@@ -1,16 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
@@ -1,216 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
- from __future__ import annotations
18
-
19
- import asyncio
20
- import typing
21
- from typing import Any
22
-
23
- from asgiref.sync import sync_to_async
24
- from sqlalchemy import func
25
-
26
- from airflow.models import DagRun
27
- from airflow.providers.standard.utils.sensor_helper import _get_count
28
- from airflow.providers.standard.version_compat import AIRFLOW_V_3_0_PLUS
29
- from airflow.triggers.base import BaseTrigger, TriggerEvent
30
- from airflow.utils.session import NEW_SESSION, provide_session
31
-
32
- if typing.TYPE_CHECKING:
33
- from datetime import datetime
34
-
35
- from sqlalchemy.orm import Session
36
-
37
- from airflow.utils.state import DagRunState
38
-
39
-
40
- class WorkflowTrigger(BaseTrigger):
41
- """
42
- A trigger to monitor tasks, task group and dag execution in Apache Airflow.
43
-
44
- :param external_dag_id: The ID of the external DAG.
45
- :param logical_dates: A list of logical dates for the external DAG.
46
- :param external_task_ids: A collection of external task IDs to wait for.
47
- :param external_task_group_id: The ID of the external task group to wait for.
48
- :param failed_states: States considered as failed for external tasks.
49
- :param skipped_states: States considered as skipped for external tasks.
50
- :param allowed_states: States considered as successful for external tasks.
51
- :param poke_interval: The interval (in seconds) for poking the external tasks.
52
- :param soft_fail: If True, the trigger will not fail the entire DAG on external task failure.
53
- """
54
-
55
- def __init__(
56
- self,
57
- external_dag_id: str,
58
- logical_dates: list[datetime] | None = None,
59
- execution_dates: list[datetime] | None = None,
60
- external_task_ids: typing.Collection[str] | None = None,
61
- external_task_group_id: str | None = None,
62
- failed_states: typing.Iterable[str] | None = None,
63
- skipped_states: typing.Iterable[str] | None = None,
64
- allowed_states: typing.Iterable[str] | None = None,
65
- poke_interval: float = 2.0,
66
- soft_fail: bool = False,
67
- **kwargs,
68
- ):
69
- self.external_dag_id = external_dag_id
70
- self.external_task_ids = external_task_ids
71
- self.external_task_group_id = external_task_group_id
72
- self.failed_states = failed_states
73
- self.skipped_states = skipped_states
74
- self.allowed_states = allowed_states
75
- self.logical_dates = logical_dates
76
- self.poke_interval = poke_interval
77
- self.soft_fail = soft_fail
78
- self.execution_dates = execution_dates
79
- super().__init__(**kwargs)
80
-
81
- def serialize(self) -> tuple[str, dict[str, Any]]:
82
- """Serialize the trigger param and module path."""
83
- _dates = (
84
- {"logical_dates": self.logical_dates}
85
- if AIRFLOW_V_3_0_PLUS
86
- else {"execution_dates": self.execution_dates}
87
- )
88
- return (
89
- "airflow.providers.standard.triggers.external_task.WorkflowTrigger",
90
- {
91
- "external_dag_id": self.external_dag_id,
92
- "external_task_ids": self.external_task_ids,
93
- "external_task_group_id": self.external_task_group_id,
94
- "failed_states": self.failed_states,
95
- "skipped_states": self.skipped_states,
96
- "allowed_states": self.allowed_states,
97
- **_dates,
98
- "poke_interval": self.poke_interval,
99
- "soft_fail": self.soft_fail,
100
- },
101
- )
102
-
103
- async def run(self) -> typing.AsyncIterator[TriggerEvent]:
104
- """Check periodically tasks, task group or dag status."""
105
- while True:
106
- if self.failed_states:
107
- failed_count = await self._get_count(self.failed_states)
108
- if failed_count > 0:
109
- yield TriggerEvent({"status": "failed"})
110
- return
111
- else:
112
- yield TriggerEvent({"status": "success"})
113
- return
114
- if self.skipped_states:
115
- skipped_count = await self._get_count(self.skipped_states)
116
- if skipped_count > 0:
117
- yield TriggerEvent({"status": "skipped"})
118
- return
119
- allowed_count = await self._get_count(self.allowed_states)
120
- _dates = self.logical_dates if AIRFLOW_V_3_0_PLUS else self.execution_dates
121
- if allowed_count == len(_dates): # type: ignore[arg-type]
122
- yield TriggerEvent({"status": "success"})
123
- return
124
- self.log.info("Sleeping for %s seconds", self.poke_interval)
125
- await asyncio.sleep(self.poke_interval)
126
-
127
- @sync_to_async
128
- def _get_count(self, states: typing.Iterable[str] | None) -> int:
129
- """
130
- Get the count of records against dttm filter and states. Async wrapper for _get_count.
131
-
132
- :param states: task or dag states
133
- :return The count of records.
134
- """
135
- return _get_count(
136
- dttm_filter=self.logical_dates if AIRFLOW_V_3_0_PLUS else self.execution_dates,
137
- external_task_ids=self.external_task_ids,
138
- external_task_group_id=self.external_task_group_id,
139
- external_dag_id=self.external_dag_id,
140
- states=states,
141
- )
142
-
143
-
144
- class DagStateTrigger(BaseTrigger):
145
- """
146
- Waits asynchronously for a DAG to complete for a specific logical date.
147
-
148
- :param dag_id: The dag_id that contains the task you want to wait for
149
- :param states: allowed states, default is ``['success']``
150
- :param logical_dates: The logical date at which DAG run.
151
- :param poll_interval: The time interval in seconds to check the state.
152
- The default value is 5.0 sec.
153
- """
154
-
155
- def __init__(
156
- self,
157
- dag_id: str,
158
- states: list[DagRunState],
159
- logical_dates: list[datetime] | None = None,
160
- execution_dates: list[datetime] | None = None,
161
- poll_interval: float = 5.0,
162
- ):
163
- super().__init__()
164
- self.dag_id = dag_id
165
- self.states = states
166
- self.logical_dates = logical_dates
167
- self.execution_dates = execution_dates
168
- self.poll_interval = poll_interval
169
-
170
- def serialize(self) -> tuple[str, dict[str, typing.Any]]:
171
- """Serialize DagStateTrigger arguments and classpath."""
172
- _dates = (
173
- {"logical_dates": self.logical_dates}
174
- if AIRFLOW_V_3_0_PLUS
175
- else {"execution_dates": self.execution_dates}
176
- )
177
- return (
178
- "airflow.providers.standard.triggers.external_task.DagStateTrigger",
179
- {
180
- "dag_id": self.dag_id,
181
- "states": self.states,
182
- **_dates,
183
- "poll_interval": self.poll_interval,
184
- },
185
- )
186
-
187
- async def run(self) -> typing.AsyncIterator[TriggerEvent]:
188
- """Check periodically if the dag run exists, and has hit one of the states yet, or not."""
189
- while True:
190
- # mypy confuses typing here
191
- num_dags = await self.count_dags() # type: ignore[call-arg]
192
- _dates = self.logical_dates if AIRFLOW_V_3_0_PLUS else self.execution_dates
193
- if num_dags == len(_dates): # type: ignore[arg-type]
194
- yield TriggerEvent(self.serialize())
195
- return
196
- await asyncio.sleep(self.poll_interval)
197
-
198
- @sync_to_async
199
- @provide_session
200
- def count_dags(self, *, session: Session = NEW_SESSION) -> int | None:
201
- """Count how many dag runs in the database match our criteria."""
202
- _dag_run_date_condition = (
203
- DagRun.logical_date.in_(self.logical_dates)
204
- if AIRFLOW_V_3_0_PLUS
205
- else DagRun.execution_date.in_(self.execution_dates)
206
- )
207
- count = (
208
- session.query(func.count("*")) # .count() is inefficient
209
- .filter(
210
- DagRun.dag_id == self.dag_id,
211
- DagRun.state.in_(self.states),
212
- _dag_run_date_condition,
213
- )
214
- .scalar()
215
- )
216
- return typing.cast(int, count)
@@ -1,77 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
- from __future__ import annotations
18
-
19
- import asyncio
20
- import datetime
21
- import os
22
- import typing
23
- from glob import glob
24
- from typing import Any
25
-
26
- from airflow.triggers.base import BaseTrigger, TriggerEvent
27
-
28
-
29
- class FileTrigger(BaseTrigger):
30
- """
31
- A trigger that fires exactly once after it finds the requested file or folder.
32
-
33
- :param filepath: File or folder name (relative to the base path set within the connection), can
34
- be a glob.
35
- :param recursive: when set to ``True``, enables recursive directory matching behavior of
36
- ``**`` in glob filepath parameter. Defaults to ``False``.
37
- :param poke_interval: Time that the job should wait in between each try
38
- """
39
-
40
- def __init__(
41
- self,
42
- filepath: str,
43
- recursive: bool = False,
44
- poke_interval: float = 5.0,
45
- **kwargs,
46
- ):
47
- super().__init__()
48
- self.filepath = filepath
49
- self.recursive = recursive
50
- self.poke_interval = poke_interval
51
-
52
- def serialize(self) -> tuple[str, dict[str, Any]]:
53
- """Serialize FileTrigger arguments and classpath."""
54
- return (
55
- "airflow.providers.standard.triggers.file.FileTrigger",
56
- {
57
- "filepath": self.filepath,
58
- "recursive": self.recursive,
59
- "poke_interval": self.poke_interval,
60
- },
61
- )
62
-
63
- async def run(self) -> typing.AsyncIterator[TriggerEvent]:
64
- """Loop until the relevant files are found."""
65
- while True:
66
- for path in glob(self.filepath, recursive=self.recursive):
67
- if os.path.isfile(path):
68
- mod_time_f = os.path.getmtime(path)
69
- mod_time = datetime.datetime.fromtimestamp(mod_time_f).strftime("%Y%m%d%H%M%S")
70
- self.log.info("Found File %s last modified: %s", path, mod_time)
71
- yield TriggerEvent(True)
72
- return
73
- for _, _, files in os.walk(self.filepath):
74
- if files:
75
- yield TriggerEvent(True)
76
- return
77
- await asyncio.sleep(self.poke_interval)
@@ -1,114 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
- from __future__ import annotations
18
-
19
- import asyncio
20
- import datetime
21
- from collections.abc import AsyncIterator
22
- from typing import Any
23
-
24
- import pendulum
25
-
26
- from airflow.exceptions import AirflowException
27
- from airflow.providers.standard.version_compat import AIRFLOW_V_2_10_PLUS
28
- from airflow.triggers.base import BaseTrigger, TriggerEvent
29
- from airflow.utils import timezone
30
-
31
- if AIRFLOW_V_2_10_PLUS:
32
- from airflow.triggers.base import TaskSuccessEvent
33
-
34
-
35
- class DateTimeTrigger(BaseTrigger):
36
- """
37
- Trigger based on a datetime.
38
-
39
- A trigger that fires exactly once, at the given datetime, give or take
40
- a few seconds.
41
-
42
- The provided datetime MUST be in UTC.
43
-
44
- :param moment: when to yield event
45
- :param end_from_trigger: whether the trigger should mark the task successful after time condition
46
- reached or resume the task after time condition reached.
47
- """
48
-
49
- def __init__(self, moment: datetime.datetime, *, end_from_trigger: bool = False) -> None:
50
- super().__init__()
51
- if not isinstance(moment, datetime.datetime):
52
- raise TypeError(f"Expected datetime.datetime type for moment. Got {type(moment)}")
53
- # Make sure it's in UTC
54
- elif moment.tzinfo is None:
55
- raise ValueError("You cannot pass naive datetimes")
56
- else:
57
- self.moment: pendulum.DateTime = timezone.convert_to_utc(moment)
58
- if not AIRFLOW_V_2_10_PLUS and end_from_trigger:
59
- raise AirflowException("end_from_trigger is only supported in Airflow 2.10 and later. ")
60
-
61
- self.end_from_trigger = end_from_trigger
62
-
63
- def serialize(self) -> tuple[str, dict[str, Any]]:
64
- return (
65
- "airflow.providers.standard.triggers.temporal.DateTimeTrigger",
66
- {"moment": self.moment, "end_from_trigger": self.end_from_trigger},
67
- )
68
-
69
- async def run(self) -> AsyncIterator[TriggerEvent]:
70
- """
71
- Loop until the relevant time is met.
72
-
73
- We do have a two-phase delay to save some cycles, but sleeping is so
74
- cheap anyway that it's pretty loose. We also don't just sleep for
75
- "the number of seconds until the time" in case the system clock changes
76
- unexpectedly, or handles a DST change poorly.
77
- """
78
- # Sleep in successively smaller increments starting from 1 hour down to 10 seconds at a time
79
- self.log.info("trigger starting")
80
- for step in 3600, 60, 10:
81
- seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
82
- while seconds_remaining > 2 * step:
83
- self.log.info("%d seconds remaining; sleeping %s seconds", seconds_remaining, step)
84
- await asyncio.sleep(step)
85
- seconds_remaining = (self.moment - pendulum.instance(timezone.utcnow())).total_seconds()
86
- # Sleep a second at a time otherwise
87
- while self.moment > pendulum.instance(timezone.utcnow()):
88
- self.log.info("sleeping 1 second...")
89
- await asyncio.sleep(1)
90
- if self.end_from_trigger:
91
- self.log.info("Sensor time condition reached; marking task successful and exiting")
92
- yield TaskSuccessEvent()
93
- else:
94
- self.log.info("yielding event with payload %r", self.moment)
95
- yield TriggerEvent(self.moment)
96
-
97
-
98
- class TimeDeltaTrigger(DateTimeTrigger):
99
- """
100
- Create DateTimeTriggers based on delays.
101
-
102
- Subclass to create DateTimeTriggers based on time delays rather
103
- than exact moments.
104
-
105
- While this is its own distinct class here, it will serialise to a
106
- DateTimeTrigger class, since they're operationally the same.
107
-
108
- :param delta: how long to wait
109
- :param end_from_trigger: whether the trigger should mark the task successful after time condition
110
- reached or resume the task after time condition reached.
111
- """
112
-
113
- def __init__(self, delta: datetime.timedelta, *, end_from_trigger: bool = False) -> None:
114
- super().__init__(moment=timezone.utcnow() + delta, end_from_trigger=end_from_trigger)
@@ -1,16 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.