apache-airflow-providers-databricks 4.1.0rc1__tar.gz → 4.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apache-airflow-providers-databricks-4.1.0rc1/apache_airflow_providers_databricks.egg-info → apache-airflow-providers-databricks-4.2.0}/PKG-INFO +44 -10
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/README.rst +42 -8
- apache-airflow-providers-databricks-4.2.0/airflow/providers/databricks/__init__.py +41 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/get_provider_info.py +10 -4
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/hooks/databricks.py +19 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/operators/databricks.py +48 -4
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/operators/databricks_sql.py +3 -0
- apache-airflow-providers-databricks-4.2.0/airflow/providers/databricks/sensors/databricks_partition.py +228 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/triggers/databricks.py +36 -5
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0/apache_airflow_providers_databricks.egg-info}/PKG-INFO +44 -10
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/apache_airflow_providers_databricks.egg-info/SOURCES.txt +1 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/apache_airflow_providers_databricks.egg-info/requires.txt +2 -2
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/pyproject.toml +3 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/setup.cfg +4 -4
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/setup.py +1 -1
- apache-airflow-providers-databricks-4.1.0rc1/airflow/providers/databricks/triggers/__init__.py +0 -17
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/LICENSE +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/MANIFEST.in +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/NOTICE +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1/airflow/providers/databricks → apache-airflow-providers-databricks-4.2.0/airflow/providers/databricks/hooks}/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/hooks/databricks_base.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1/airflow/providers/databricks/hooks → apache-airflow-providers-databricks-4.2.0/airflow/providers/databricks/operators}/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/operators/databricks_repos.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1/airflow/providers/databricks/operators → apache-airflow-providers-databricks-4.2.0/airflow/providers/databricks/triggers}/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/airflow/providers/databricks/utils/databricks.py +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/apache_airflow_providers_databricks.egg-info/dependency_links.txt +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/apache_airflow_providers_databricks.egg-info/entry_points.txt +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/apache_airflow_providers_databricks.egg-info/not-zip-safe +0 -0
- {apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/apache_airflow_providers_databricks.egg-info/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-databricks package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/
|
|
11
11
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
12
12
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
13
13
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -53,7 +53,7 @@ License-File: NOTICE
|
|
|
53
53
|
|
|
54
54
|
Package ``apache-airflow-providers-databricks``
|
|
55
55
|
|
|
56
|
-
Release: ``4.
|
|
56
|
+
Release: ``4.2.0``
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
`Databricks <https://databricks.com/>`__
|
|
@@ -66,7 +66,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
66
66
|
are in ``airflow.providers.databricks`` python package.
|
|
67
67
|
|
|
68
68
|
You can find package information and changelog for the provider
|
|
69
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
69
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/>`_.
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
Installation
|
|
@@ -84,8 +84,8 @@ Requirements
|
|
|
84
84
|
======================================= ===================
|
|
85
85
|
PIP package Version required
|
|
86
86
|
======================================= ===================
|
|
87
|
-
``apache-airflow`` ``>=2.
|
|
88
|
-
``apache-airflow-providers-common-sql`` ``>=1.
|
|
87
|
+
``apache-airflow`` ``>=2.4.0``
|
|
88
|
+
``apache-airflow-providers-common-sql`` ``>=1.5.0``
|
|
89
89
|
``requests`` ``>=2.27,<3``
|
|
90
90
|
``databricks-sql-connector`` ``>=2.0.0, <3.0.0``
|
|
91
91
|
``aiohttp`` ``>=3.6.3, <4``
|
|
@@ -136,6 +136,38 @@ Dependent package
|
|
|
136
136
|
Changelog
|
|
137
137
|
---------
|
|
138
138
|
|
|
139
|
+
4.2.0
|
|
140
|
+
.....
|
|
141
|
+
|
|
142
|
+
.. note::
|
|
143
|
+
This release of provider is only available for Airflow 2.4+ as explained in the
|
|
144
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
145
|
+
|
|
146
|
+
Features
|
|
147
|
+
~~~~~~~~
|
|
148
|
+
|
|
149
|
+
* ``Add conditional output processing in SQL operators (#31136)``
|
|
150
|
+
* ``Add cancel all runs functionality to Databricks hook (#31038)``
|
|
151
|
+
* ``Add retry param in databrics async operator (#30744)``
|
|
152
|
+
* ``Add repair job functionality to databricks hook (#30786)``
|
|
153
|
+
* ``Add 'DatabricksPartitionSensor' (#30980)``
|
|
154
|
+
|
|
155
|
+
Misc
|
|
156
|
+
~~~~
|
|
157
|
+
|
|
158
|
+
* ``Bump minimum Airflow version in providers (#30917)``
|
|
159
|
+
* ``Deprecate databricks async operator (#30761)``
|
|
160
|
+
|
|
161
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
162
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
163
|
+
* ``Move TaskInstanceKey to a separate file (#31033)``
|
|
164
|
+
* ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
|
|
165
|
+
* ``Add full automation for min Airflow version for providers (#30994)``
|
|
166
|
+
* ``Add cli cmd to list the provider trigger info (#30822)``
|
|
167
|
+
* ``Use '__version__' in providers not 'version' (#31393)``
|
|
168
|
+
* ``Fixing circular import error in providers caused by airflow version check (#31379)``
|
|
169
|
+
* ``Prepare docs for May 2023 wave of Providers (#31252)``
|
|
170
|
+
|
|
139
171
|
4.1.0
|
|
140
172
|
.....
|
|
141
173
|
|
|
@@ -203,8 +235,9 @@ Bug Fixes
|
|
|
203
235
|
3.4.0
|
|
204
236
|
.....
|
|
205
237
|
|
|
206
|
-
|
|
207
|
-
|
|
238
|
+
.. note::
|
|
239
|
+
This release of provider is only available for Airflow 2.3+ as explained in the
|
|
240
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
208
241
|
|
|
209
242
|
Misc
|
|
210
243
|
~~~~
|
|
@@ -300,8 +333,9 @@ Bug Fixes
|
|
|
300
333
|
Breaking changes
|
|
301
334
|
~~~~~~~~~~~~~~~~
|
|
302
335
|
|
|
303
|
-
|
|
304
|
-
|
|
336
|
+
.. note::
|
|
337
|
+
This release of provider is only available for Airflow 2.2+ as explained in the
|
|
338
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
305
339
|
|
|
306
340
|
Features
|
|
307
341
|
~~~~~~~~
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
Package ``apache-airflow-providers-databricks``
|
|
21
21
|
|
|
22
|
-
Release: ``4.
|
|
22
|
+
Release: ``4.2.0``
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
`Databricks <https://databricks.com/>`__
|
|
@@ -32,7 +32,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
32
32
|
are in ``airflow.providers.databricks`` python package.
|
|
33
33
|
|
|
34
34
|
You can find package information and changelog for the provider
|
|
35
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
35
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/>`_.
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
Installation
|
|
@@ -50,8 +50,8 @@ Requirements
|
|
|
50
50
|
======================================= ===================
|
|
51
51
|
PIP package Version required
|
|
52
52
|
======================================= ===================
|
|
53
|
-
``apache-airflow`` ``>=2.
|
|
54
|
-
``apache-airflow-providers-common-sql`` ``>=1.
|
|
53
|
+
``apache-airflow`` ``>=2.4.0``
|
|
54
|
+
``apache-airflow-providers-common-sql`` ``>=1.5.0``
|
|
55
55
|
``requests`` ``>=2.27,<3``
|
|
56
56
|
``databricks-sql-connector`` ``>=2.0.0, <3.0.0``
|
|
57
57
|
``aiohttp`` ``>=3.6.3, <4``
|
|
@@ -102,6 +102,38 @@ Dependent package
|
|
|
102
102
|
Changelog
|
|
103
103
|
---------
|
|
104
104
|
|
|
105
|
+
4.2.0
|
|
106
|
+
.....
|
|
107
|
+
|
|
108
|
+
.. note::
|
|
109
|
+
This release of provider is only available for Airflow 2.4+ as explained in the
|
|
110
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
111
|
+
|
|
112
|
+
Features
|
|
113
|
+
~~~~~~~~
|
|
114
|
+
|
|
115
|
+
* ``Add conditional output processing in SQL operators (#31136)``
|
|
116
|
+
* ``Add cancel all runs functionality to Databricks hook (#31038)``
|
|
117
|
+
* ``Add retry param in databrics async operator (#30744)``
|
|
118
|
+
* ``Add repair job functionality to databricks hook (#30786)``
|
|
119
|
+
* ``Add 'DatabricksPartitionSensor' (#30980)``
|
|
120
|
+
|
|
121
|
+
Misc
|
|
122
|
+
~~~~
|
|
123
|
+
|
|
124
|
+
* ``Bump minimum Airflow version in providers (#30917)``
|
|
125
|
+
* ``Deprecate databricks async operator (#30761)``
|
|
126
|
+
|
|
127
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
128
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
129
|
+
* ``Move TaskInstanceKey to a separate file (#31033)``
|
|
130
|
+
* ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
|
|
131
|
+
* ``Add full automation for min Airflow version for providers (#30994)``
|
|
132
|
+
* ``Add cli cmd to list the provider trigger info (#30822)``
|
|
133
|
+
* ``Use '__version__' in providers not 'version' (#31393)``
|
|
134
|
+
* ``Fixing circular import error in providers caused by airflow version check (#31379)``
|
|
135
|
+
* ``Prepare docs for May 2023 wave of Providers (#31252)``
|
|
136
|
+
|
|
105
137
|
4.1.0
|
|
106
138
|
.....
|
|
107
139
|
|
|
@@ -169,8 +201,9 @@ Bug Fixes
|
|
|
169
201
|
3.4.0
|
|
170
202
|
.....
|
|
171
203
|
|
|
172
|
-
|
|
173
|
-
|
|
204
|
+
.. note::
|
|
205
|
+
This release of provider is only available for Airflow 2.3+ as explained in the
|
|
206
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
174
207
|
|
|
175
208
|
Misc
|
|
176
209
|
~~~~
|
|
@@ -266,8 +299,9 @@ Bug Fixes
|
|
|
266
299
|
Breaking changes
|
|
267
300
|
~~~~~~~~~~~~~~~~
|
|
268
301
|
|
|
269
|
-
|
|
270
|
-
|
|
302
|
+
.. note::
|
|
303
|
+
This release of provider is only available for Airflow 2.2+ as explained in the
|
|
304
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
271
305
|
|
|
272
306
|
Features
|
|
273
307
|
~~~~~~~~
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
#
|
|
19
|
+
# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE
|
|
20
|
+
# OVERWRITTEN WHEN PREPARING DOCUMENTATION FOR THE PACKAGES.
|
|
21
|
+
#
|
|
22
|
+
# IF YOU WANT TO MODIFY IT, YOU SHOULD MODIFY THE TEMPLATE
|
|
23
|
+
# `PROVIDER__INIT__PY_TEMPLATE.py.jinja2` IN the `dev/provider_packages` DIRECTORY
|
|
24
|
+
#
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import packaging.version
|
|
28
|
+
|
|
29
|
+
__all__ = ["__version__"]
|
|
30
|
+
|
|
31
|
+
__version__ = "4.2.0"
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
from airflow import __version__ as airflow_version
|
|
35
|
+
except ImportError:
|
|
36
|
+
from airflow.version import version as airflow_version
|
|
37
|
+
|
|
38
|
+
if packaging.version.parse(airflow_version) < packaging.version.parse("2.4.0"):
|
|
39
|
+
raise RuntimeError(
|
|
40
|
+
f"The package `apache-airflow-providers-databricks:{__version__}` requires Apache Airflow 2.4.0+" # NOQA: E501
|
|
41
|
+
)
|
|
@@ -29,6 +29,7 @@ def get_provider_info():
|
|
|
29
29
|
"description": "`Databricks <https://databricks.com/>`__\n",
|
|
30
30
|
"suspended": False,
|
|
31
31
|
"versions": [
|
|
32
|
+
"4.2.0",
|
|
32
33
|
"4.1.0",
|
|
33
34
|
"4.0.1",
|
|
34
35
|
"4.0.0",
|
|
@@ -51,8 +52,8 @@ def get_provider_info():
|
|
|
51
52
|
"1.0.0",
|
|
52
53
|
],
|
|
53
54
|
"dependencies": [
|
|
54
|
-
"apache-airflow>=2.
|
|
55
|
-
"apache-airflow-providers-common-sql>=1.
|
|
55
|
+
"apache-airflow>=2.4.0",
|
|
56
|
+
"apache-airflow-providers-common-sql>=1.5.0",
|
|
56
57
|
"requests>=2.27,<3",
|
|
57
58
|
"databricks-sql-connector>=2.0.0, <3.0.0",
|
|
58
59
|
"aiohttp>=3.6.3, <4",
|
|
@@ -120,13 +121,18 @@ def get_provider_info():
|
|
|
120
121
|
"triggers": [
|
|
121
122
|
{
|
|
122
123
|
"integration-name": "Databricks",
|
|
123
|
-
"
|
|
124
|
+
"class-names": [
|
|
125
|
+
"airflow.providers.databricks.triggers.databricks.DatabricksExecutionTrigger"
|
|
126
|
+
],
|
|
124
127
|
}
|
|
125
128
|
],
|
|
126
129
|
"sensors": [
|
|
127
130
|
{
|
|
128
131
|
"integration-name": "Databricks",
|
|
129
|
-
"python-modules": [
|
|
132
|
+
"python-modules": [
|
|
133
|
+
"airflow.providers.databricks.sensors.databricks_sql",
|
|
134
|
+
"airflow.providers.databricks.sensors.databricks_partition",
|
|
135
|
+
],
|
|
130
136
|
}
|
|
131
137
|
],
|
|
132
138
|
"connection-types": [
|
|
@@ -44,7 +44,9 @@ SUBMIT_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/submit")
|
|
|
44
44
|
GET_RUN_ENDPOINT = ("GET", "api/2.1/jobs/runs/get")
|
|
45
45
|
CANCEL_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel")
|
|
46
46
|
DELETE_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/delete")
|
|
47
|
+
REPAIR_RUN_ENDPOINT = ("POST", "api/2.1/jobs/runs/repair")
|
|
47
48
|
OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "api/2.1/jobs/runs/get-output")
|
|
49
|
+
CANCEL_ALL_RUNS_ENDPOINT = ("POST", "api/2.1/jobs/runs/cancel-all")
|
|
48
50
|
|
|
49
51
|
INSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/install")
|
|
50
52
|
UNINSTALL_LIBS_ENDPOINT = ("POST", "api/2.0/libraries/uninstall")
|
|
@@ -352,6 +354,15 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
352
354
|
json = {"run_id": run_id}
|
|
353
355
|
self._do_api_call(CANCEL_RUN_ENDPOINT, json)
|
|
354
356
|
|
|
357
|
+
def cancel_all_runs(self, job_id: int) -> None:
|
|
358
|
+
"""
|
|
359
|
+
Cancels all active runs of a job. The runs are canceled asynchronously.
|
|
360
|
+
|
|
361
|
+
:param job_id: The canonical identifier of the job to cancel all runs of
|
|
362
|
+
"""
|
|
363
|
+
json = {"job_id": job_id}
|
|
364
|
+
self._do_api_call(CANCEL_ALL_RUNS_ENDPOINT, json)
|
|
365
|
+
|
|
355
366
|
def delete_run(self, run_id: int) -> None:
|
|
356
367
|
"""
|
|
357
368
|
Deletes a non-active run.
|
|
@@ -361,6 +372,14 @@ class DatabricksHook(BaseDatabricksHook):
|
|
|
361
372
|
json = {"run_id": run_id}
|
|
362
373
|
self._do_api_call(DELETE_RUN_ENDPOINT, json)
|
|
363
374
|
|
|
375
|
+
def repair_run(self, json: dict) -> None:
|
|
376
|
+
"""
|
|
377
|
+
Re-run one or more tasks.
|
|
378
|
+
|
|
379
|
+
:param json: repair a job run.
|
|
380
|
+
"""
|
|
381
|
+
self._do_api_call(REPAIR_RUN_ENDPOINT, json)
|
|
382
|
+
|
|
364
383
|
def restart_cluster(self, json: dict) -> None:
|
|
365
384
|
"""
|
|
366
385
|
Restarts the cluster.
|
|
@@ -19,22 +19,24 @@
|
|
|
19
19
|
from __future__ import annotations
|
|
20
20
|
|
|
21
21
|
import time
|
|
22
|
+
import warnings
|
|
22
23
|
from logging import Logger
|
|
23
24
|
from typing import TYPE_CHECKING, Any, Sequence
|
|
24
25
|
|
|
25
26
|
from airflow.compat.functools import cached_property
|
|
26
|
-
from airflow.exceptions import AirflowException
|
|
27
|
+
from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
|
|
27
28
|
from airflow.models import BaseOperator, BaseOperatorLink, XCom
|
|
28
29
|
from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunState
|
|
29
30
|
from airflow.providers.databricks.triggers.databricks import DatabricksExecutionTrigger
|
|
30
31
|
from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
|
|
31
32
|
|
|
32
33
|
if TYPE_CHECKING:
|
|
33
|
-
from airflow.models.
|
|
34
|
+
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
34
35
|
from airflow.utils.context import Context
|
|
35
36
|
|
|
36
37
|
DEFER_METHOD_NAME = "execute_complete"
|
|
37
38
|
XCOM_RUN_ID_KEY = "run_id"
|
|
39
|
+
XCOM_JOB_ID_KEY = "job_id"
|
|
38
40
|
XCOM_RUN_PAGE_URL_KEY = "run_page_url"
|
|
39
41
|
|
|
40
42
|
|
|
@@ -103,6 +105,9 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
|
|
|
103
105
|
:param operator: Databricks async operator being handled
|
|
104
106
|
:param context: Airflow context
|
|
105
107
|
"""
|
|
108
|
+
job_id = hook.get_job_id(operator.run_id)
|
|
109
|
+
if operator.do_xcom_push and context is not None:
|
|
110
|
+
context["ti"].xcom_push(key=XCOM_JOB_ID_KEY, value=job_id)
|
|
106
111
|
if operator.do_xcom_push and context is not None:
|
|
107
112
|
context["ti"].xcom_push(key=XCOM_RUN_ID_KEY, value=operator.run_id)
|
|
108
113
|
log.info("Run submitted with run_id: %s", operator.run_id)
|
|
@@ -118,6 +123,10 @@ def _handle_deferrable_databricks_operator_execution(operator, hook, log, contex
|
|
|
118
123
|
run_id=operator.run_id,
|
|
119
124
|
databricks_conn_id=operator.databricks_conn_id,
|
|
120
125
|
polling_period_seconds=operator.polling_period_seconds,
|
|
126
|
+
retry_limit=operator.databricks_retry_limit,
|
|
127
|
+
retry_delay=operator.databricks_retry_delay,
|
|
128
|
+
retry_args=operator.databricks_retry_args,
|
|
129
|
+
run_page_url=run_page_url,
|
|
121
130
|
),
|
|
122
131
|
method_name=DEFER_METHOD_NAME,
|
|
123
132
|
)
|
|
@@ -267,6 +276,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
267
276
|
:param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
|
|
268
277
|
:param git_source: Optional specification of a remote git repository from which
|
|
269
278
|
supported task types are retrieved.
|
|
279
|
+
:param deferrable: Run operator in the deferrable mode.
|
|
270
280
|
|
|
271
281
|
.. seealso::
|
|
272
282
|
https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit
|
|
@@ -306,6 +316,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
306
316
|
access_control_list: list[dict[str, str]] | None = None,
|
|
307
317
|
wait_for_termination: bool = True,
|
|
308
318
|
git_source: dict[str, str] | None = None,
|
|
319
|
+
deferrable: bool = False,
|
|
309
320
|
**kwargs,
|
|
310
321
|
) -> None:
|
|
311
322
|
"""Creates a new ``DatabricksSubmitRunOperator``."""
|
|
@@ -317,6 +328,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
317
328
|
self.databricks_retry_delay = databricks_retry_delay
|
|
318
329
|
self.databricks_retry_args = databricks_retry_args
|
|
319
330
|
self.wait_for_termination = wait_for_termination
|
|
331
|
+
self.deferrable = deferrable
|
|
320
332
|
if tasks is not None:
|
|
321
333
|
self.json["tasks"] = tasks
|
|
322
334
|
if spark_jar_task is not None:
|
|
@@ -373,7 +385,10 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
373
385
|
def execute(self, context: Context):
|
|
374
386
|
json_normalised = normalise_json_content(self.json)
|
|
375
387
|
self.run_id = self._hook.submit_run(json_normalised)
|
|
376
|
-
|
|
388
|
+
if self.deferrable:
|
|
389
|
+
_handle_deferrable_databricks_operator_execution(self, self._hook, self.log, context)
|
|
390
|
+
else:
|
|
391
|
+
_handle_databricks_operator_execution(self, self._hook, self.log, context)
|
|
377
392
|
|
|
378
393
|
def on_kill(self):
|
|
379
394
|
if self.run_id:
|
|
@@ -384,10 +399,23 @@ class DatabricksSubmitRunOperator(BaseOperator):
|
|
|
384
399
|
else:
|
|
385
400
|
self.log.error("Error: Task: %s with invalid run_id was requested to be cancelled.", self.task_id)
|
|
386
401
|
|
|
402
|
+
def execute_complete(self, context: dict | None, event: dict):
|
|
403
|
+
_handle_deferrable_databricks_operator_completion(event, self.log)
|
|
404
|
+
|
|
387
405
|
|
|
388
406
|
class DatabricksSubmitRunDeferrableOperator(DatabricksSubmitRunOperator):
|
|
389
407
|
"""Deferrable version of ``DatabricksSubmitRunOperator``"""
|
|
390
408
|
|
|
409
|
+
def __init__(self, *args, **kwargs):
|
|
410
|
+
warnings.warn(
|
|
411
|
+
"`DatabricksSubmitRunDeferrableOperator` has been deprecated. "
|
|
412
|
+
"Please use `airflow.providers.databricks.operators.DatabricksSubmitRunOperator` with "
|
|
413
|
+
"`deferrable=True` instead.",
|
|
414
|
+
AirflowProviderDeprecationWarning,
|
|
415
|
+
stacklevel=2,
|
|
416
|
+
)
|
|
417
|
+
super().__init__(deferrable=True, *args, **kwargs)
|
|
418
|
+
|
|
391
419
|
def execute(self, context):
|
|
392
420
|
hook = self._get_hook(caller="DatabricksSubmitRunDeferrableOperator")
|
|
393
421
|
json_normalised = normalise_json_content(self.json)
|
|
@@ -549,6 +577,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
549
577
|
:param databricks_retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
|
|
550
578
|
:param do_xcom_push: Whether we should push run_id and run_page_url to xcom.
|
|
551
579
|
:param wait_for_termination: if we should wait for termination of the job run. ``True`` by default.
|
|
580
|
+
:param deferrable: Run operator in the deferrable mode.
|
|
552
581
|
"""
|
|
553
582
|
|
|
554
583
|
# Used in airflow.models.BaseOperator
|
|
@@ -578,6 +607,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
578
607
|
databricks_retry_args: dict[Any, Any] | None = None,
|
|
579
608
|
do_xcom_push: bool = True,
|
|
580
609
|
wait_for_termination: bool = True,
|
|
610
|
+
deferrable: bool = False,
|
|
581
611
|
**kwargs,
|
|
582
612
|
) -> None:
|
|
583
613
|
"""Creates a new ``DatabricksRunNowOperator``."""
|
|
@@ -589,6 +619,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
589
619
|
self.databricks_retry_delay = databricks_retry_delay
|
|
590
620
|
self.databricks_retry_args = databricks_retry_args
|
|
591
621
|
self.wait_for_termination = wait_for_termination
|
|
622
|
+
self.deferrable = deferrable
|
|
592
623
|
|
|
593
624
|
if job_id is not None:
|
|
594
625
|
self.json["job_id"] = job_id
|
|
@@ -636,7 +667,10 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
636
667
|
self.json["job_id"] = job_id
|
|
637
668
|
del self.json["job_name"]
|
|
638
669
|
self.run_id = hook.run_now(self.json)
|
|
639
|
-
|
|
670
|
+
if self.deferrable:
|
|
671
|
+
_handle_deferrable_databricks_operator_execution(self, hook, self.log, context)
|
|
672
|
+
else:
|
|
673
|
+
_handle_databricks_operator_execution(self, hook, self.log, context)
|
|
640
674
|
|
|
641
675
|
def on_kill(self):
|
|
642
676
|
if self.run_id:
|
|
@@ -651,6 +685,16 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
651
685
|
class DatabricksRunNowDeferrableOperator(DatabricksRunNowOperator):
|
|
652
686
|
"""Deferrable version of ``DatabricksRunNowOperator``"""
|
|
653
687
|
|
|
688
|
+
def __init__(self, *args, **kwargs):
|
|
689
|
+
warnings.warn(
|
|
690
|
+
"`DatabricksRunNowDeferrableOperator` has been deprecated. "
|
|
691
|
+
"Please use `airflow.providers.databricks.operators.DatabricksRunNowOperator` with "
|
|
692
|
+
"`deferrable=True` instead.",
|
|
693
|
+
AirflowProviderDeprecationWarning,
|
|
694
|
+
stacklevel=2,
|
|
695
|
+
)
|
|
696
|
+
super().__init__(deferrable=True, *args, **kwargs)
|
|
697
|
+
|
|
654
698
|
def execute(self, context):
|
|
655
699
|
hook = self._get_hook(caller="DatabricksRunNowDeferrableOperator")
|
|
656
700
|
self.run_id = hook.run_now(self.json)
|
|
@@ -120,6 +120,9 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
|
|
|
120
120
|
}
|
|
121
121
|
return DatabricksSqlHook(self.databricks_conn_id, **hook_params)
|
|
122
122
|
|
|
123
|
+
def _should_run_output_processing(self) -> bool:
|
|
124
|
+
return self.do_xcom_push or bool(self._output_path)
|
|
125
|
+
|
|
123
126
|
def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
|
|
124
127
|
if not self._output_path:
|
|
125
128
|
return list(zip(descriptions, results))
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
# or more contributor license agreements. See the NOTICE file
|
|
4
|
+
# distributed with this work for additional information
|
|
5
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
# to you under the Apache License, Version 2.0 (the
|
|
7
|
+
# "License"); you may not use this file except in compliance
|
|
8
|
+
# with the License. You may obtain a copy of the License at
|
|
9
|
+
#
|
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
#
|
|
12
|
+
# Unless required by applicable law or agreed to in writing,
|
|
13
|
+
# software distributed under the License is distributed on an
|
|
14
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
# KIND, either express or implied. See the License for the
|
|
16
|
+
# specific language governing permissions and limitations
|
|
17
|
+
# under the License.
|
|
18
|
+
#
|
|
19
|
+
"""This module contains Databricks sensors."""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from typing import TYPE_CHECKING, Any, Callable, Sequence
|
|
25
|
+
|
|
26
|
+
from databricks.sql.utils import ParamEscaper
|
|
27
|
+
|
|
28
|
+
from airflow.compat.functools import cached_property
|
|
29
|
+
from airflow.exceptions import AirflowException
|
|
30
|
+
from airflow.providers.common.sql.hooks.sql import fetch_all_handler
|
|
31
|
+
from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
|
|
32
|
+
from airflow.sensors.base import BaseSensorOperator
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from airflow.utils.context import Context
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class DatabricksPartitionSensor(BaseSensorOperator):
|
|
39
|
+
"""
|
|
40
|
+
Sensor to detect the presence of table partitions in Databricks.
|
|
41
|
+
|
|
42
|
+
:param databricks_conn_id: Reference to :ref:`Databricks
|
|
43
|
+
connection id<howto/connection:databricks>` (templated), defaults to
|
|
44
|
+
DatabricksSqlHook.default_conn_name.
|
|
45
|
+
:param sql_warehouse_name: Optional name of Databricks SQL warehouse. If not specified, ``http_path``
|
|
46
|
+
must be provided as described below, defaults to None
|
|
47
|
+
:param http_path: Optional string specifying HTTP path of Databricks SQL warehouse or All Purpose cluster.
|
|
48
|
+
If not specified, it should be either specified in the Databricks connection's
|
|
49
|
+
extra parameters, or ``sql_warehouse_name`` must be specified.
|
|
50
|
+
:param session_configuration: An optional dictionary of Spark session parameters. If not specified,
|
|
51
|
+
it could be specified in the Databricks connection's extra parameters, defaults to None
|
|
52
|
+
:param http_headers: An optional list of (k, v) pairs
|
|
53
|
+
that will be set as HTTP headers on every request. (templated).
|
|
54
|
+
:param catalog: An optional initial catalog to use.
|
|
55
|
+
Requires Databricks Runtime version 9.0+ (templated), defaults to ""
|
|
56
|
+
:param schema: An optional initial schema to use.
|
|
57
|
+
Requires Databricks Runtime version 9.0+ (templated), defaults to "default"
|
|
58
|
+
:param table_name: Name of the table to check partitions.
|
|
59
|
+
:param partitions: Name of the partitions to check.
|
|
60
|
+
Example: {"date": "2023-01-03", "name": ["abc", "def"]}
|
|
61
|
+
:param partition_operator: Optional comparison operator for partitions, such as >=.
|
|
62
|
+
:param handler: Handler for DbApiHook.run() to return results, defaults to fetch_all_handler
|
|
63
|
+
:param client_parameters: Additional parameters internal to Databricks SQL connector parameters.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
template_fields: Sequence[str] = (
|
|
67
|
+
"databricks_conn_id",
|
|
68
|
+
"catalog",
|
|
69
|
+
"schema",
|
|
70
|
+
"table_name",
|
|
71
|
+
"partitions",
|
|
72
|
+
"http_headers",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
template_ext: Sequence[str] = (".sql",)
|
|
76
|
+
template_fields_renderers = {"sql": "sql"}
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
80
|
+
*,
|
|
81
|
+
databricks_conn_id: str = DatabricksSqlHook.default_conn_name,
|
|
82
|
+
http_path: str | None = None,
|
|
83
|
+
sql_warehouse_name: str | None = None,
|
|
84
|
+
session_configuration=None,
|
|
85
|
+
http_headers: list[tuple[str, str]] | None = None,
|
|
86
|
+
catalog: str = "",
|
|
87
|
+
schema: str = "default",
|
|
88
|
+
table_name: str,
|
|
89
|
+
partitions: dict,
|
|
90
|
+
partition_operator: str = "=",
|
|
91
|
+
handler: Callable[[Any], Any] = fetch_all_handler,
|
|
92
|
+
client_parameters: dict[str, Any] | None = None,
|
|
93
|
+
**kwargs,
|
|
94
|
+
) -> None:
|
|
95
|
+
self.databricks_conn_id = databricks_conn_id
|
|
96
|
+
self._http_path = http_path
|
|
97
|
+
self._sql_warehouse_name = sql_warehouse_name
|
|
98
|
+
self.session_config = session_configuration
|
|
99
|
+
self.http_headers = http_headers
|
|
100
|
+
self.catalog = catalog
|
|
101
|
+
self.schema = schema
|
|
102
|
+
self.caller = "DatabricksPartitionSensor"
|
|
103
|
+
self.partitions = partitions
|
|
104
|
+
self.partition_operator = partition_operator
|
|
105
|
+
self.table_name = table_name
|
|
106
|
+
self.client_parameters = client_parameters or {}
|
|
107
|
+
self.hook_params = kwargs.pop("hook_params", {})
|
|
108
|
+
self.handler = handler
|
|
109
|
+
self.escaper = ParamEscaper()
|
|
110
|
+
super().__init__(**kwargs)
|
|
111
|
+
|
|
112
|
+
def _sql_sensor(self, sql):
|
|
113
|
+
"""Executes the supplied SQL statement using the hook object."""
|
|
114
|
+
hook = self._get_hook
|
|
115
|
+
sql_result = hook.run(
|
|
116
|
+
sql,
|
|
117
|
+
handler=self.handler if self.do_xcom_push else None,
|
|
118
|
+
)
|
|
119
|
+
self.log.debug("SQL result: %s", sql_result)
|
|
120
|
+
return sql_result
|
|
121
|
+
|
|
122
|
+
@cached_property
|
|
123
|
+
def _get_hook(self) -> DatabricksSqlHook:
|
|
124
|
+
"""Creates and returns a DatabricksSqlHook object."""
|
|
125
|
+
return DatabricksSqlHook(
|
|
126
|
+
self.databricks_conn_id,
|
|
127
|
+
self._http_path,
|
|
128
|
+
self._sql_warehouse_name,
|
|
129
|
+
self.session_config,
|
|
130
|
+
self.http_headers,
|
|
131
|
+
self.catalog,
|
|
132
|
+
self.schema,
|
|
133
|
+
self.caller,
|
|
134
|
+
**self.client_parameters,
|
|
135
|
+
**self.hook_params,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def _check_table_partitions(self) -> list:
|
|
139
|
+
"""
|
|
140
|
+
The method performs the following:
|
|
141
|
+
* Generates the fully qualified table name.
|
|
142
|
+
* Calls the generate partition query.
|
|
143
|
+
* Based on the result returned by the partition generation method,
|
|
144
|
+
the _sql_sensor method is called.
|
|
145
|
+
"""
|
|
146
|
+
if self.table_name.split(".")[0] == "delta":
|
|
147
|
+
_fully_qualified_table_name = self.table_name
|
|
148
|
+
else:
|
|
149
|
+
_fully_qualified_table_name = str(self.catalog + "." + self.schema + "." + self.table_name)
|
|
150
|
+
self.log.debug("Table name generated from arguments: %s", _fully_qualified_table_name)
|
|
151
|
+
_joiner_val = " AND "
|
|
152
|
+
_prefix = f"SELECT 1 FROM {_fully_qualified_table_name} WHERE"
|
|
153
|
+
_suffix = " LIMIT 1"
|
|
154
|
+
|
|
155
|
+
partition_sql = self._generate_partition_query(
|
|
156
|
+
prefix=_prefix,
|
|
157
|
+
suffix=_suffix,
|
|
158
|
+
joiner_val=_joiner_val,
|
|
159
|
+
opts=self.partitions,
|
|
160
|
+
table_name=_fully_qualified_table_name,
|
|
161
|
+
escape_key=False,
|
|
162
|
+
)
|
|
163
|
+
return self._sql_sensor(partition_sql)
|
|
164
|
+
|
|
165
|
+
def _generate_partition_query(
|
|
166
|
+
self,
|
|
167
|
+
prefix: str,
|
|
168
|
+
suffix: str,
|
|
169
|
+
joiner_val: str,
|
|
170
|
+
table_name: str,
|
|
171
|
+
opts: dict[str, str] | None = None,
|
|
172
|
+
escape_key: bool = False,
|
|
173
|
+
) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Queries the table for available partitions.
|
|
176
|
+
Generates the SQL query based on the partition data types.
|
|
177
|
+
* For a list, it prepares the SQL in the format:
|
|
178
|
+
column_name in (value1, value2,...)
|
|
179
|
+
* For a numeric type, it prepares the format:
|
|
180
|
+
column_name =(or other provided operator such as >=) value
|
|
181
|
+
* For a date type, it prepares the format:
|
|
182
|
+
column_name =(or other provided operator such as >=) value
|
|
183
|
+
Once the filter predicates have been generated like above, the query
|
|
184
|
+
is prepared to be executed using the prefix and suffix supplied, which are:
|
|
185
|
+
"SELECT 1 FROM {_fully_qualified_table_name} WHERE" and "LIMIT 1".
|
|
186
|
+
"""
|
|
187
|
+
partition_columns = self._sql_sensor(f"DESCRIBE DETAIL {table_name}")[0][7]
|
|
188
|
+
self.log.debug("Partition columns: %s", partition_columns)
|
|
189
|
+
if len(partition_columns) < 1:
|
|
190
|
+
raise AirflowException(f"Table {table_name} does not have partitions")
|
|
191
|
+
formatted_opts = ""
|
|
192
|
+
if opts is not None and len(opts) > 0:
|
|
193
|
+
output_list = []
|
|
194
|
+
for partition_col, partition_value in opts.items():
|
|
195
|
+
if escape_key:
|
|
196
|
+
partition_col = self.escaper.escape_item(partition_col)
|
|
197
|
+
if partition_col in partition_columns:
|
|
198
|
+
if isinstance(partition_value, list):
|
|
199
|
+
output_list.append(f"""{partition_col} in {tuple(partition_value)}""")
|
|
200
|
+
self.log.debug("List formatting for partitions: %s", output_list)
|
|
201
|
+
if isinstance(partition_value, (int, float, complex)):
|
|
202
|
+
output_list.append(
|
|
203
|
+
f"""{partition_col}{self.partition_operator}{self.escaper.escape_item(partition_value)}"""
|
|
204
|
+
)
|
|
205
|
+
if isinstance(partition_value, (str, datetime)):
|
|
206
|
+
output_list.append(
|
|
207
|
+
f"""{partition_col}{self.partition_operator}{self.escaper.escape_item(partition_value)}"""
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
raise AirflowException(
|
|
211
|
+
f"Column {partition_col} not part of table partitions: {partition_columns}"
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
# Raises exception if the table does not have any partitions.
|
|
215
|
+
raise AirflowException("No partitions specified to check with the sensor.")
|
|
216
|
+
formatted_opts = f"{prefix} {joiner_val.join(output_list)} {suffix}"
|
|
217
|
+
self.log.debug("Formatted options: %s", formatted_opts)
|
|
218
|
+
|
|
219
|
+
return formatted_opts.strip()
|
|
220
|
+
|
|
221
|
+
def poke(self, context: Context) -> bool:
|
|
222
|
+
"""Checks the table partitions and returns the results."""
|
|
223
|
+
partition_result = self._check_table_partitions()
|
|
224
|
+
self.log.debug("Partition sensor result: %s", partition_result)
|
|
225
|
+
if len(partition_result) >= 1:
|
|
226
|
+
return True
|
|
227
|
+
else:
|
|
228
|
+
raise AirflowException(f"Specified partition(s): {self.partitions} were not found.")
|
|
@@ -32,14 +32,36 @@ class DatabricksExecutionTrigger(BaseTrigger):
|
|
|
32
32
|
:param databricks_conn_id: Reference to the :ref:`Databricks connection <howto/connection:databricks>`.
|
|
33
33
|
:param polling_period_seconds: Controls the rate of the poll for the result of this run.
|
|
34
34
|
By default, the trigger will poll every 30 seconds.
|
|
35
|
+
:param retry_limit: The number of times to retry the connection in case of service outages.
|
|
36
|
+
:param retry_delay: The number of seconds to wait between retries.
|
|
37
|
+
:param retry_args: An optional dictionary with arguments passed to ``tenacity.Retrying`` class.
|
|
38
|
+
:param run_page_url: The run page url.
|
|
35
39
|
"""
|
|
36
40
|
|
|
37
|
-
def __init__(
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
run_id: int,
|
|
44
|
+
databricks_conn_id: str,
|
|
45
|
+
polling_period_seconds: int = 30,
|
|
46
|
+
retry_limit: int = 3,
|
|
47
|
+
retry_delay: int = 10,
|
|
48
|
+
retry_args: dict[Any, Any] | None = None,
|
|
49
|
+
run_page_url: str | None = None,
|
|
50
|
+
) -> None:
|
|
38
51
|
super().__init__()
|
|
39
52
|
self.run_id = run_id
|
|
40
53
|
self.databricks_conn_id = databricks_conn_id
|
|
41
54
|
self.polling_period_seconds = polling_period_seconds
|
|
42
|
-
self.
|
|
55
|
+
self.retry_limit = retry_limit
|
|
56
|
+
self.retry_delay = retry_delay
|
|
57
|
+
self.retry_args = retry_args
|
|
58
|
+
self.run_page_url = run_page_url
|
|
59
|
+
self.hook = DatabricksHook(
|
|
60
|
+
databricks_conn_id,
|
|
61
|
+
retry_limit=self.retry_limit,
|
|
62
|
+
retry_delay=self.retry_delay,
|
|
63
|
+
retry_args=retry_args,
|
|
64
|
+
)
|
|
43
65
|
|
|
44
66
|
def serialize(self) -> tuple[str, dict[str, Any]]:
|
|
45
67
|
return (
|
|
@@ -48,22 +70,31 @@ class DatabricksExecutionTrigger(BaseTrigger):
|
|
|
48
70
|
"run_id": self.run_id,
|
|
49
71
|
"databricks_conn_id": self.databricks_conn_id,
|
|
50
72
|
"polling_period_seconds": self.polling_period_seconds,
|
|
73
|
+
"retry_limit": self.retry_limit,
|
|
74
|
+
"retry_delay": self.retry_delay,
|
|
75
|
+
"retry_args": self.retry_args,
|
|
76
|
+
"run_page_url": self.run_page_url,
|
|
51
77
|
},
|
|
52
78
|
)
|
|
53
79
|
|
|
54
80
|
async def run(self):
|
|
55
81
|
async with self.hook:
|
|
56
|
-
run_page_url = await self.hook.a_get_run_page_url(self.run_id)
|
|
57
82
|
while True:
|
|
58
83
|
run_state = await self.hook.a_get_run_state(self.run_id)
|
|
59
84
|
if run_state.is_terminal:
|
|
60
85
|
yield TriggerEvent(
|
|
61
86
|
{
|
|
62
87
|
"run_id": self.run_id,
|
|
88
|
+
"run_page_url": self.run_page_url,
|
|
63
89
|
"run_state": run_state.to_json(),
|
|
64
|
-
"run_page_url": run_page_url,
|
|
65
90
|
}
|
|
66
91
|
)
|
|
67
|
-
|
|
92
|
+
return
|
|
68
93
|
else:
|
|
94
|
+
self.log.info(
|
|
95
|
+
"run-id %s in run state %s. sleeping for %s seconds",
|
|
96
|
+
self.run_id,
|
|
97
|
+
run_state,
|
|
98
|
+
self.polling_period_seconds,
|
|
99
|
+
)
|
|
69
100
|
await asyncio.sleep(self.polling_period_seconds)
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.2.0
|
|
4
4
|
Summary: Provider for Apache Airflow. Implements apache-airflow-providers-databricks package
|
|
5
5
|
Home-page: https://airflow.apache.org/
|
|
6
6
|
Download-URL: https://archive.apache.org/dist/airflow/providers
|
|
7
7
|
Author: Apache Software Foundation
|
|
8
8
|
Author-email: dev@airflow.apache.org
|
|
9
9
|
License: Apache License 2.0
|
|
10
|
-
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
10
|
+
Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/
|
|
11
11
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
12
12
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
13
13
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
@@ -53,7 +53,7 @@ License-File: NOTICE
|
|
|
53
53
|
|
|
54
54
|
Package ``apache-airflow-providers-databricks``
|
|
55
55
|
|
|
56
|
-
Release: ``4.
|
|
56
|
+
Release: ``4.2.0``
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
`Databricks <https://databricks.com/>`__
|
|
@@ -66,7 +66,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
66
66
|
are in ``airflow.providers.databricks`` python package.
|
|
67
67
|
|
|
68
68
|
You can find package information and changelog for the provider
|
|
69
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
69
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/>`_.
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
Installation
|
|
@@ -84,8 +84,8 @@ Requirements
|
|
|
84
84
|
======================================= ===================
|
|
85
85
|
PIP package Version required
|
|
86
86
|
======================================= ===================
|
|
87
|
-
``apache-airflow`` ``>=2.
|
|
88
|
-
``apache-airflow-providers-common-sql`` ``>=1.
|
|
87
|
+
``apache-airflow`` ``>=2.4.0``
|
|
88
|
+
``apache-airflow-providers-common-sql`` ``>=1.5.0``
|
|
89
89
|
``requests`` ``>=2.27,<3``
|
|
90
90
|
``databricks-sql-connector`` ``>=2.0.0, <3.0.0``
|
|
91
91
|
``aiohttp`` ``>=3.6.3, <4``
|
|
@@ -136,6 +136,38 @@ Dependent package
|
|
|
136
136
|
Changelog
|
|
137
137
|
---------
|
|
138
138
|
|
|
139
|
+
4.2.0
|
|
140
|
+
.....
|
|
141
|
+
|
|
142
|
+
.. note::
|
|
143
|
+
This release of provider is only available for Airflow 2.4+ as explained in the
|
|
144
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
145
|
+
|
|
146
|
+
Features
|
|
147
|
+
~~~~~~~~
|
|
148
|
+
|
|
149
|
+
* ``Add conditional output processing in SQL operators (#31136)``
|
|
150
|
+
* ``Add cancel all runs functionality to Databricks hook (#31038)``
|
|
151
|
+
* ``Add retry param in databrics async operator (#30744)``
|
|
152
|
+
* ``Add repair job functionality to databricks hook (#30786)``
|
|
153
|
+
* ``Add 'DatabricksPartitionSensor' (#30980)``
|
|
154
|
+
|
|
155
|
+
Misc
|
|
156
|
+
~~~~
|
|
157
|
+
|
|
158
|
+
* ``Bump minimum Airflow version in providers (#30917)``
|
|
159
|
+
* ``Deprecate databricks async operator (#30761)``
|
|
160
|
+
|
|
161
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
162
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
163
|
+
* ``Move TaskInstanceKey to a separate file (#31033)``
|
|
164
|
+
* ``Use 'AirflowProviderDeprecationWarning' in providers (#30975)``
|
|
165
|
+
* ``Add full automation for min Airflow version for providers (#30994)``
|
|
166
|
+
* ``Add cli cmd to list the provider trigger info (#30822)``
|
|
167
|
+
* ``Use '__version__' in providers not 'version' (#31393)``
|
|
168
|
+
* ``Fixing circular import error in providers caused by airflow version check (#31379)``
|
|
169
|
+
* ``Prepare docs for May 2023 wave of Providers (#31252)``
|
|
170
|
+
|
|
139
171
|
4.1.0
|
|
140
172
|
.....
|
|
141
173
|
|
|
@@ -203,8 +235,9 @@ Bug Fixes
|
|
|
203
235
|
3.4.0
|
|
204
236
|
.....
|
|
205
237
|
|
|
206
|
-
|
|
207
|
-
|
|
238
|
+
.. note::
|
|
239
|
+
This release of provider is only available for Airflow 2.3+ as explained in the
|
|
240
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
208
241
|
|
|
209
242
|
Misc
|
|
210
243
|
~~~~
|
|
@@ -300,8 +333,9 @@ Bug Fixes
|
|
|
300
333
|
Breaking changes
|
|
301
334
|
~~~~~~~~~~~~~~~~
|
|
302
335
|
|
|
303
|
-
|
|
304
|
-
|
|
336
|
+
.. note::
|
|
337
|
+
This release of provider is only available for Airflow 2.2+ as explained in the
|
|
338
|
+
`Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_.
|
|
305
339
|
|
|
306
340
|
Features
|
|
307
341
|
~~~~~~~~
|
|
@@ -16,6 +16,7 @@ airflow/providers/databricks/operators/databricks.py
|
|
|
16
16
|
airflow/providers/databricks/operators/databricks_repos.py
|
|
17
17
|
airflow/providers/databricks/operators/databricks_sql.py
|
|
18
18
|
airflow/providers/databricks/sensors/__init__.py
|
|
19
|
+
airflow/providers/databricks/sensors/databricks_partition.py
|
|
19
20
|
airflow/providers/databricks/sensors/databricks_sql.py
|
|
20
21
|
airflow/providers/databricks/triggers/__init__.py
|
|
21
22
|
airflow/providers/databricks/triggers/databricks.py
|
|
@@ -46,11 +46,14 @@ extend-select = [
|
|
|
46
46
|
"UP", # Pyupgrade
|
|
47
47
|
"RUF100", # Unused noqa (auto-fixable)
|
|
48
48
|
|
|
49
|
+
# implicit single-line string concatenation
|
|
50
|
+
"ISC001",
|
|
49
51
|
# We ignore more pydocstyle than we enable, so be more selective at what we enable
|
|
50
52
|
"D101",
|
|
51
53
|
"D106",
|
|
52
54
|
"D2",
|
|
53
55
|
"D3",
|
|
56
|
+
# "D400", WIP: see #31135
|
|
54
57
|
# "D401", # Not enabled by ruff, but we don't want it
|
|
55
58
|
"D402",
|
|
56
59
|
"D403",
|
{apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/setup.cfg
RENAMED
|
@@ -27,7 +27,7 @@ classifiers =
|
|
|
27
27
|
Programming Language :: Python :: 3.10
|
|
28
28
|
Topic :: System :: Monitoring
|
|
29
29
|
project_urls =
|
|
30
|
-
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.
|
|
30
|
+
Documentation=https://airflow.apache.org/docs/apache-airflow-providers-databricks/4.2.0/
|
|
31
31
|
Bug Tracker=https://github.com/apache/airflow/issues
|
|
32
32
|
Source Code=https://github.com/apache/airflow
|
|
33
33
|
Slack Chat=https://s.apache.org/airflow-slack
|
|
@@ -47,8 +47,8 @@ setup_requires =
|
|
|
47
47
|
wheel
|
|
48
48
|
install_requires =
|
|
49
49
|
aiohttp>=3.6.3, <4
|
|
50
|
-
apache-airflow-providers-common-sql>=1.
|
|
51
|
-
apache-airflow>=2.
|
|
50
|
+
apache-airflow-providers-common-sql>=1.5.0
|
|
51
|
+
apache-airflow>=2.4.0
|
|
52
52
|
databricks-sql-connector>=2.0.0, <3.0.0
|
|
53
53
|
requests>=2.27,<3
|
|
54
54
|
|
|
@@ -60,6 +60,6 @@ apache_airflow_provider =
|
|
|
60
60
|
packages = airflow.providers.databricks
|
|
61
61
|
|
|
62
62
|
[egg_info]
|
|
63
|
-
tag_build =
|
|
63
|
+
tag_build =
|
|
64
64
|
tag_date = 0
|
|
65
65
|
|
apache-airflow-providers-databricks-4.1.0rc1/airflow/providers/databricks/triggers/__init__.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
# or more contributor license agreements. See the NOTICE file
|
|
4
|
-
# distributed with this work for additional information
|
|
5
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
# to you under the Apache License, Version 2.0 (the
|
|
7
|
-
# "License"); you may not use this file except in compliance
|
|
8
|
-
# with the License. You may obtain a copy of the License at
|
|
9
|
-
#
|
|
10
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
#
|
|
12
|
-
# Unless required by applicable law or agreed to in writing,
|
|
13
|
-
# software distributed under the License is distributed on an
|
|
14
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
# KIND, either express or implied. See the License for the
|
|
16
|
-
# specific language governing permissions and limitations
|
|
17
|
-
# under the License.
|
{apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|
{apache-airflow-providers-databricks-4.1.0rc1 → apache-airflow-providers-databricks-4.2.0}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|