apache-airflow-providers-standard 0.1.0rc1__py3-none-any.whl → 1.0.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-standard might be problematic. Click here for more details.
- airflow/providers/standard/LICENSE +52 -0
- airflow/providers/standard/__init__.py +1 -23
- airflow/providers/standard/get_provider_info.py +7 -52
- airflow/providers/standard/operators/bash.py +28 -82
- airflow/providers/standard/operators/datetime.py +3 -8
- airflow/providers/standard/operators/weekday.py +4 -11
- airflow/providers/standard/sensors/bash.py +5 -11
- airflow/providers/standard/sensors/date_time.py +8 -32
- airflow/providers/standard/sensors/time.py +5 -28
- airflow/providers/standard/sensors/time_delta.py +10 -48
- airflow/providers/standard/sensors/weekday.py +2 -7
- {apache_airflow_providers_standard-0.1.0rc1.dist-info → apache_airflow_providers_standard-1.0.0.dev1.dist-info}/METADATA +36 -20
- apache_airflow_providers_standard-1.0.0.dev1.dist-info/RECORD +17 -0
- {apache_airflow_providers_standard-0.1.0rc1.dist-info → apache_airflow_providers_standard-1.0.0.dev1.dist-info}/WHEEL +1 -1
- airflow/providers/standard/hooks/__init__.py +0 -16
- airflow/providers/standard/hooks/filesystem.py +0 -89
- airflow/providers/standard/hooks/package_index.py +0 -95
- airflow/providers/standard/hooks/subprocess.py +0 -119
- airflow/providers/standard/operators/empty.py +0 -39
- airflow/providers/standard/operators/generic_transfer.py +0 -138
- airflow/providers/standard/operators/latest_only.py +0 -83
- airflow/providers/standard/operators/python.py +0 -1132
- airflow/providers/standard/operators/trigger_dagrun.py +0 -292
- airflow/providers/standard/sensors/external_task.py +0 -509
- airflow/providers/standard/sensors/filesystem.py +0 -158
- airflow/providers/standard/sensors/python.py +0 -85
- airflow/providers/standard/triggers/__init__.py +0 -16
- airflow/providers/standard/triggers/external_task.py +0 -211
- airflow/providers/standard/triggers/file.py +0 -131
- airflow/providers/standard/triggers/temporal.py +0 -114
- airflow/providers/standard/utils/__init__.py +0 -16
- airflow/providers/standard/utils/python_virtualenv.py +0 -209
- airflow/providers/standard/utils/python_virtualenv_script.jinja2 +0 -77
- airflow/providers/standard/utils/sensor_helper.py +0 -119
- airflow/providers/standard/version_compat.py +0 -36
- apache_airflow_providers_standard-0.1.0rc1.dist-info/RECORD +0 -38
- {apache_airflow_providers_standard-0.1.0rc1.dist-info → apache_airflow_providers_standard-1.0.0.dev1.dist-info}/entry_points.txt +0 -0
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
|
4
|
-
# distributed with this work for additional information
|
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
|
7
|
-
# "License"); you may not use this file except in compliance
|
|
8
|
-
# with the License. You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
|
13
|
-
# software distributed under the License is distributed on an
|
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
# KIND, either express or implied. See the License for the
|
|
16
|
-
# specific language governing permissions and limitations
|
|
17
|
-
# under the License.
|
|
18
|
-
from __future__ import annotations
|
|
19
|
-
|
|
20
|
-
from collections.abc import Sequence
|
|
21
|
-
from typing import TYPE_CHECKING
|
|
22
|
-
|
|
23
|
-
from airflow.hooks.base import BaseHook
|
|
24
|
-
from airflow.models import BaseOperator
|
|
25
|
-
|
|
26
|
-
if TYPE_CHECKING:
|
|
27
|
-
try:
|
|
28
|
-
from airflow.sdk.definitions.context import Context
|
|
29
|
-
except ImportError:
|
|
30
|
-
# TODO: Remove once provider drops support for Airflow 2
|
|
31
|
-
from airflow.utils.context import Context
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class GenericTransfer(BaseOperator):
|
|
35
|
-
"""
|
|
36
|
-
Moves data from a connection to another.
|
|
37
|
-
|
|
38
|
-
Assuming that they both provide the required methods in their respective hooks.
|
|
39
|
-
The source hook needs to expose a `get_records` method, and the destination a
|
|
40
|
-
`insert_rows` method.
|
|
41
|
-
|
|
42
|
-
This is meant to be used on small-ish datasets that fit in memory.
|
|
43
|
-
|
|
44
|
-
:param sql: SQL query to execute against the source database. (templated)
|
|
45
|
-
:param destination_table: target table. (templated)
|
|
46
|
-
:param source_conn_id: source connection. (templated)
|
|
47
|
-
:param destination_conn_id: destination connection. (templated)
|
|
48
|
-
:param preoperator: sql statement or list of statements to be
|
|
49
|
-
executed prior to loading the data. (templated)
|
|
50
|
-
:param insert_args: extra params for `insert_rows` method.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
template_fields: Sequence[str] = (
|
|
54
|
-
"source_conn_id",
|
|
55
|
-
"destination_conn_id",
|
|
56
|
-
"sql",
|
|
57
|
-
"destination_table",
|
|
58
|
-
"preoperator",
|
|
59
|
-
"insert_args",
|
|
60
|
-
)
|
|
61
|
-
template_ext: Sequence[str] = (
|
|
62
|
-
".sql",
|
|
63
|
-
".hql",
|
|
64
|
-
)
|
|
65
|
-
template_fields_renderers = {"preoperator": "sql"}
|
|
66
|
-
ui_color = "#b0f07c"
|
|
67
|
-
|
|
68
|
-
def __init__(
|
|
69
|
-
self,
|
|
70
|
-
*,
|
|
71
|
-
sql: str,
|
|
72
|
-
destination_table: str,
|
|
73
|
-
source_conn_id: str,
|
|
74
|
-
source_hook_params: dict | None = None,
|
|
75
|
-
destination_conn_id: str,
|
|
76
|
-
destination_hook_params: dict | None = None,
|
|
77
|
-
preoperator: str | list[str] | None = None,
|
|
78
|
-
insert_args: dict | None = None,
|
|
79
|
-
**kwargs,
|
|
80
|
-
) -> None:
|
|
81
|
-
super().__init__(**kwargs)
|
|
82
|
-
self.sql = sql
|
|
83
|
-
self.destination_table = destination_table
|
|
84
|
-
self.source_conn_id = source_conn_id
|
|
85
|
-
self.source_hook_params = source_hook_params
|
|
86
|
-
self.destination_conn_id = destination_conn_id
|
|
87
|
-
self.destination_hook_params = destination_hook_params
|
|
88
|
-
self.preoperator = preoperator
|
|
89
|
-
self.insert_args = insert_args or {}
|
|
90
|
-
|
|
91
|
-
@classmethod
|
|
92
|
-
def get_hook(cls, conn_id: str, hook_params: dict | None = None) -> BaseHook:
|
|
93
|
-
"""
|
|
94
|
-
Return default hook for this connection id.
|
|
95
|
-
|
|
96
|
-
:param conn_id: connection id
|
|
97
|
-
:param hook_params: hook parameters
|
|
98
|
-
:return: default hook for this connection
|
|
99
|
-
"""
|
|
100
|
-
connection = BaseHook.get_connection(conn_id)
|
|
101
|
-
return connection.get_hook(hook_params=hook_params)
|
|
102
|
-
|
|
103
|
-
def execute(self, context: Context):
|
|
104
|
-
source_hook = self.get_hook(conn_id=self.source_conn_id, hook_params=self.source_hook_params)
|
|
105
|
-
destination_hook = self.get_hook(
|
|
106
|
-
conn_id=self.destination_conn_id, hook_params=self.destination_hook_params
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
self.log.info("Extracting data from %s", self.source_conn_id)
|
|
110
|
-
self.log.info("Executing: \n %s", self.sql)
|
|
111
|
-
get_records = getattr(source_hook, "get_records", None)
|
|
112
|
-
if not callable(get_records):
|
|
113
|
-
raise RuntimeError(
|
|
114
|
-
f"Hook for connection {self.source_conn_id!r} "
|
|
115
|
-
f"({type(source_hook).__name__}) has no `get_records` method"
|
|
116
|
-
)
|
|
117
|
-
else:
|
|
118
|
-
results = get_records(self.sql)
|
|
119
|
-
|
|
120
|
-
if self.preoperator:
|
|
121
|
-
run = getattr(destination_hook, "run", None)
|
|
122
|
-
if not callable(run):
|
|
123
|
-
raise RuntimeError(
|
|
124
|
-
f"Hook for connection {self.destination_conn_id!r} "
|
|
125
|
-
f"({type(destination_hook).__name__}) has no `run` method"
|
|
126
|
-
)
|
|
127
|
-
self.log.info("Running preoperator")
|
|
128
|
-
self.log.info(self.preoperator)
|
|
129
|
-
run(self.preoperator)
|
|
130
|
-
|
|
131
|
-
insert_rows = getattr(destination_hook, "insert_rows", None)
|
|
132
|
-
if not callable(insert_rows):
|
|
133
|
-
raise RuntimeError(
|
|
134
|
-
f"Hook for connection {self.destination_conn_id!r} "
|
|
135
|
-
f"({type(destination_hook).__name__}) has no `insert_rows` method"
|
|
136
|
-
)
|
|
137
|
-
self.log.info("Inserting rows into %s", self.destination_conn_id)
|
|
138
|
-
insert_rows(table=self.destination_table, rows=results, **self.insert_args)
|
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
|
4
|
-
# distributed with this work for additional information
|
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
|
7
|
-
# "License"); you may not use this file except in compliance
|
|
8
|
-
# with the License. You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
|
13
|
-
# software distributed under the License is distributed on an
|
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
# KIND, either express or implied. See the License for the
|
|
16
|
-
# specific language governing permissions and limitations
|
|
17
|
-
# under the License.
|
|
18
|
-
"""Contains an operator to run downstream tasks only for the latest scheduled DagRun."""
|
|
19
|
-
|
|
20
|
-
from __future__ import annotations
|
|
21
|
-
|
|
22
|
-
from collections.abc import Iterable
|
|
23
|
-
from typing import TYPE_CHECKING
|
|
24
|
-
|
|
25
|
-
import pendulum
|
|
26
|
-
|
|
27
|
-
from airflow.operators.branch import BaseBranchOperator
|
|
28
|
-
|
|
29
|
-
if TYPE_CHECKING:
|
|
30
|
-
from airflow.models import DAG, DagRun
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
from airflow.sdk.definitions.context import Context
|
|
34
|
-
except ImportError:
|
|
35
|
-
# TODO: Remove once provider drops support for Airflow 2
|
|
36
|
-
from airflow.utils.context import Context
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class LatestOnlyOperator(BaseBranchOperator):
|
|
40
|
-
"""
|
|
41
|
-
Skip tasks that are not running during the most recent schedule interval.
|
|
42
|
-
|
|
43
|
-
If the task is run outside the latest schedule interval (i.e. external_trigger),
|
|
44
|
-
all directly downstream tasks will be skipped.
|
|
45
|
-
|
|
46
|
-
Note that downstream tasks are never skipped if the given DAG_Run is
|
|
47
|
-
marked as externally triggered.
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
ui_color = "#e9ffdb" # nyanza
|
|
51
|
-
|
|
52
|
-
def choose_branch(self, context: Context) -> str | Iterable[str]:
|
|
53
|
-
# If the DAG Run is externally triggered, then return without
|
|
54
|
-
# skipping downstream tasks
|
|
55
|
-
dag_run: DagRun = context["dag_run"] # type: ignore[assignment]
|
|
56
|
-
if dag_run.external_trigger:
|
|
57
|
-
self.log.info("Externally triggered DAG_Run: allowing execution to proceed.")
|
|
58
|
-
return list(context["task"].get_direct_relative_ids(upstream=False))
|
|
59
|
-
|
|
60
|
-
dag: DAG = context["dag"] # type: ignore[assignment]
|
|
61
|
-
next_info = dag.next_dagrun_info(dag.get_run_data_interval(dag_run), restricted=False)
|
|
62
|
-
now = pendulum.now("UTC")
|
|
63
|
-
|
|
64
|
-
if next_info is None:
|
|
65
|
-
self.log.info("Last scheduled execution: allowing execution to proceed.")
|
|
66
|
-
return list(context["task"].get_direct_relative_ids(upstream=False))
|
|
67
|
-
|
|
68
|
-
left_window, right_window = next_info.data_interval
|
|
69
|
-
self.log.info(
|
|
70
|
-
"Checking latest only with left_window: %s right_window: %s now: %s",
|
|
71
|
-
left_window,
|
|
72
|
-
right_window,
|
|
73
|
-
now,
|
|
74
|
-
)
|
|
75
|
-
|
|
76
|
-
if not left_window < now <= right_window:
|
|
77
|
-
self.log.info("Not latest execution, skipping downstream.")
|
|
78
|
-
# we return an empty list, thus the parent BaseBranchOperator
|
|
79
|
-
# won't exclude any downstream tasks from skipping.
|
|
80
|
-
return []
|
|
81
|
-
else:
|
|
82
|
-
self.log.info("Latest, allowing execution to proceed.")
|
|
83
|
-
return list(context["task"].get_direct_relative_ids(upstream=False))
|