apache-airflow-providers-databricks 7.4.0rc1__tar.gz → 7.5.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apache-airflow-providers-databricks might be problematic. Click here for more details.
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/PKG-INFO +17 -15
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/README.rst +13 -11
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/changelog.rst +18 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/index.rst +13 -11
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/sql_statements.rst +46 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/provider.yaml +3 -1
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/pyproject.toml +4 -4
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/__init__.py +1 -1
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/get_provider_info.py +1 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks.py +57 -79
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks_sql.py +112 -3
- apache_airflow_providers_databricks-7.5.0rc1/src/airflow/providers/databricks/sensors/databricks.py +162 -0
- apache_airflow_providers_databricks-7.5.0rc1/src/airflow/providers/databricks/utils/mixins.py +194 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_sensors.py +14 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks.py +154 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_copy.py +177 -0
- apache_airflow_providers_databricks-7.5.0rc1/tests/unit/databricks/sensors/test_databricks.py +208 -0
- apache_airflow_providers_databricks-7.5.0rc1/tests/unit/databricks/utils/test_mixins.py +127 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/.latest-doc-only-change.txt +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/commits.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/conf.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/connections/databricks.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/databricks_workflow_task_group_airflow_graph_view.png +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/workflow_plugin_launch_task.png +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/workflow_plugin_single_task.png +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/workflow_run_databricks_graph_view.png +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/installing-providers-from-sources.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/integration-logos/Databricks.png +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/copy_into.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/index.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/jobs_create.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/notebook.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/repos_create.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/repos_delete.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/repos_update.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/run_now.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/sql.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/submit_run.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/task.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/workflow.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/plugins/index.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/plugins/workflow.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/security.rst +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/LICENSE +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/exceptions.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/databricks_base.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/plugins/databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/triggers/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/utils/databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/utils/openlineage.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/version_compat.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/conftest.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity_async.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_base.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_repos.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/plugins/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/plugins/test_databricks_workflow.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/sensors/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/sensors/test_databricks_partition.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/sensors/test_databricks_sql.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/test_exceptions.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/triggers/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/triggers/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/utils/__init__.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/utils/test_databricks.py +0 -0
- {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/utils/test_openlineage.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apache-airflow-providers-databricks
|
|
3
|
-
Version: 7.
|
|
3
|
+
Version: 7.5.0rc1
|
|
4
4
|
Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
|
|
5
5
|
Keywords: airflow-provider,databricks,airflow,integration
|
|
6
6
|
Author-email: Apache Software Foundation <dev@airflow.apache.org>
|
|
@@ -23,7 +23,7 @@ Classifier: Topic :: System :: Monitoring
|
|
|
23
23
|
Requires-Dist: apache-airflow>=2.10.0rc1
|
|
24
24
|
Requires-Dist: apache-airflow-providers-common-compat>=1.6.0rc1
|
|
25
25
|
Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
|
|
26
|
-
Requires-Dist: requests>=2.
|
|
26
|
+
Requires-Dist: requests>=2.32.0,<3
|
|
27
27
|
Requires-Dist: databricks-sql-connector>=3.0.0
|
|
28
28
|
Requires-Dist: databricks-sqlalchemy>=1.0.2
|
|
29
29
|
Requires-Dist: aiohttp>=3.9.2, <4
|
|
@@ -36,8 +36,8 @@ Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openli
|
|
|
36
36
|
Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
|
|
37
37
|
Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
|
|
38
38
|
Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
|
|
39
|
-
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
40
|
-
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
39
|
+
Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html
|
|
40
|
+
Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0
|
|
41
41
|
Project-URL: Mastodon, https://fosstodon.org/@airflow
|
|
42
42
|
Project-URL: Slack Chat, https://s.apache.org/airflow-slack
|
|
43
43
|
Project-URL: Source Code, https://github.com/apache/airflow
|
|
@@ -73,7 +73,7 @@ Provides-Extra: standard
|
|
|
73
73
|
|
|
74
74
|
Package ``apache-airflow-providers-databricks``
|
|
75
75
|
|
|
76
|
-
Release: ``7.
|
|
76
|
+
Release: ``7.5.0``
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
`Databricks <https://databricks.com/>`__
|
|
@@ -86,7 +86,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
86
86
|
are in ``airflow.providers.databricks`` python package.
|
|
87
87
|
|
|
88
88
|
You can find package information and changelog for the provider
|
|
89
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
89
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/>`_.
|
|
90
90
|
|
|
91
91
|
Installation
|
|
92
92
|
------------
|
|
@@ -106,7 +106,7 @@ PIP package Version required
|
|
|
106
106
|
``apache-airflow`` ``>=2.10.0``
|
|
107
107
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
108
108
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
109
|
-
``requests`` ``>=2.
|
|
109
|
+
``requests`` ``>=2.32.0,<3``
|
|
110
110
|
``databricks-sql-connector`` ``>=3.0.0``
|
|
111
111
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
112
112
|
``aiohttp`` ``>=3.9.2,<4``
|
|
@@ -125,16 +125,18 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
125
125
|
|
|
126
126
|
.. code-block:: bash
|
|
127
127
|
|
|
128
|
-
pip install apache-airflow-providers-databricks[common.
|
|
128
|
+
pip install apache-airflow-providers-databricks[common.compat]
|
|
129
129
|
|
|
130
130
|
|
|
131
|
-
|
|
132
|
-
Dependent package
|
|
133
|
-
|
|
134
|
-
`apache-airflow-providers-common-
|
|
135
|
-
`apache-airflow-providers-
|
|
136
|
-
|
|
131
|
+
================================================================================================================== =================
|
|
132
|
+
Dependent package Extra
|
|
133
|
+
================================================================================================================== =================
|
|
134
|
+
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
135
|
+
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
136
|
+
`apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
|
|
137
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
138
|
+
================================================================================================================== =================
|
|
137
139
|
|
|
138
140
|
The changelog for the provider package can be found in the
|
|
139
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
141
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html>`_.
|
|
140
142
|
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
Package ``apache-airflow-providers-databricks``
|
|
25
25
|
|
|
26
|
-
Release: ``7.
|
|
26
|
+
Release: ``7.5.0``
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
`Databricks <https://databricks.com/>`__
|
|
@@ -36,7 +36,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
|
|
|
36
36
|
are in ``airflow.providers.databricks`` python package.
|
|
37
37
|
|
|
38
38
|
You can find package information and changelog for the provider
|
|
39
|
-
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
39
|
+
in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/>`_.
|
|
40
40
|
|
|
41
41
|
Installation
|
|
42
42
|
------------
|
|
@@ -56,7 +56,7 @@ PIP package Version required
|
|
|
56
56
|
``apache-airflow`` ``>=2.10.0``
|
|
57
57
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
58
58
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
59
|
-
``requests`` ``>=2.
|
|
59
|
+
``requests`` ``>=2.32.0,<3``
|
|
60
60
|
``databricks-sql-connector`` ``>=3.0.0``
|
|
61
61
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
62
62
|
``aiohttp`` ``>=3.9.2,<4``
|
|
@@ -75,15 +75,17 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
75
75
|
|
|
76
76
|
.. code-block:: bash
|
|
77
77
|
|
|
78
|
-
pip install apache-airflow-providers-databricks[common.
|
|
78
|
+
pip install apache-airflow-providers-databricks[common.compat]
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
Dependent package
|
|
83
|
-
|
|
84
|
-
`apache-airflow-providers-common-
|
|
85
|
-
`apache-airflow-providers-
|
|
86
|
-
|
|
81
|
+
================================================================================================================== =================
|
|
82
|
+
Dependent package Extra
|
|
83
|
+
================================================================================================================== =================
|
|
84
|
+
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
85
|
+
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
86
|
+
`apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
|
|
87
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
88
|
+
================================================================================================================== =================
|
|
87
89
|
|
|
88
90
|
The changelog for the provider package can be found in the
|
|
89
|
-
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
91
|
+
`changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html>`_.
|
|
@@ -26,6 +26,24 @@
|
|
|
26
26
|
Changelog
|
|
27
27
|
---------
|
|
28
28
|
|
|
29
|
+
7.5.0
|
|
30
|
+
.....
|
|
31
|
+
|
|
32
|
+
Features
|
|
33
|
+
~~~~~~~~
|
|
34
|
+
|
|
35
|
+
* ``feat: Add OpenLineage support for DatabricksSQLStatementsOperator (#50891)``
|
|
36
|
+
* ``feat: Add OpenLineage support for DatabricksCopyIntoOperator (#50821)``
|
|
37
|
+
* ``Adding 'DatabricksSQLStatementSensor' Sensor with Deferrability (#49516)``
|
|
38
|
+
|
|
39
|
+
Misc
|
|
40
|
+
~~~~
|
|
41
|
+
|
|
42
|
+
* ``Bump some provider dependencies for faster resolution (#51727)``
|
|
43
|
+
|
|
44
|
+
.. Below changes are excluded from the changelog. Move them to
|
|
45
|
+
appropriate section above if needed. Do not delete the lines(!):
|
|
46
|
+
|
|
29
47
|
7.4.0
|
|
30
48
|
.....
|
|
31
49
|
|
|
@@ -78,7 +78,7 @@ apache-airflow-providers-databricks package
|
|
|
78
78
|
`Databricks <https://databricks.com/>`__
|
|
79
79
|
|
|
80
80
|
|
|
81
|
-
Release: 7.
|
|
81
|
+
Release: 7.5.0
|
|
82
82
|
|
|
83
83
|
Provider package
|
|
84
84
|
----------------
|
|
@@ -104,7 +104,7 @@ PIP package Version required
|
|
|
104
104
|
``apache-airflow`` ``>=2.10.0``
|
|
105
105
|
``apache-airflow-providers-common-compat`` ``>=1.6.0``
|
|
106
106
|
``apache-airflow-providers-common-sql`` ``>=1.27.0``
|
|
107
|
-
``requests`` ``>=2.
|
|
107
|
+
``requests`` ``>=2.32.0,<3``
|
|
108
108
|
``databricks-sql-connector`` ``>=3.0.0``
|
|
109
109
|
``databricks-sqlalchemy`` ``>=1.0.2``
|
|
110
110
|
``aiohttp`` ``>=3.9.2,<4``
|
|
@@ -123,15 +123,17 @@ You can install such cross-provider dependencies when installing from PyPI. For
|
|
|
123
123
|
|
|
124
124
|
.. code-block:: bash
|
|
125
125
|
|
|
126
|
-
pip install apache-airflow-providers-databricks[common.
|
|
126
|
+
pip install apache-airflow-providers-databricks[common.compat]
|
|
127
127
|
|
|
128
128
|
|
|
129
|
-
|
|
130
|
-
Dependent package
|
|
131
|
-
|
|
132
|
-
`apache-airflow-providers-common-
|
|
133
|
-
`apache-airflow-providers-
|
|
134
|
-
|
|
129
|
+
================================================================================================================== =================
|
|
130
|
+
Dependent package Extra
|
|
131
|
+
================================================================================================================== =================
|
|
132
|
+
`apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
|
|
133
|
+
`apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
|
|
134
|
+
`apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
|
|
135
|
+
`apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
|
|
136
|
+
================================================================================================================== =================
|
|
135
137
|
|
|
136
138
|
Downloading official packages
|
|
137
139
|
-----------------------------
|
|
@@ -139,5 +141,5 @@ Downloading official packages
|
|
|
139
141
|
You can download officially released packages and verify their checksums and signatures from the
|
|
140
142
|
`Official Apache Download site <https://downloads.apache.org/airflow/providers/>`_
|
|
141
143
|
|
|
142
|
-
* `The apache-airflow-providers-databricks 7.
|
|
143
|
-
* `The apache-airflow-providers-databricks 7.
|
|
144
|
+
* `The apache-airflow-providers-databricks 7.5.0 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0.tar.gz.sha512>`__)
|
|
145
|
+
* `The apache-airflow-providers-databricks 7.5.0 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0-py3-none-any.whl.sha512>`__)
|
|
@@ -55,3 +55,49 @@ An example usage of the ``DatabricksSQLStatementsOperator`` is as follows:
|
|
|
55
55
|
:language: python
|
|
56
56
|
:start-after: [START howto_operator_sql_statements]
|
|
57
57
|
:end-before: [END howto_operator_sql_statements]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
.. _howto/sensor:DatabricksSQLStatementsSensor:
|
|
61
|
+
|
|
62
|
+
DatabricksSQLStatementsSensor
|
|
63
|
+
===============================
|
|
64
|
+
|
|
65
|
+
Use the :class:`~airflow.providers.databricks.sensor.databricks.DatabricksSQLStatementsSensor` to either submit a
|
|
66
|
+
Databricks SQL Statement to Databricks using the
|
|
67
|
+
`Databricks SQL Statement Execution API <https://docs.databricks.com/api/workspace/statementexecution>`_, or pass
|
|
68
|
+
a Statement ID to the Sensor and await for the query to terminate execution.
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
Using the Sensor
|
|
72
|
+
------------------
|
|
73
|
+
|
|
74
|
+
The ``DatabricksSQLStatementsSensor`` does one of two things. The Sensor can submit SQL statements to Databricks using
|
|
75
|
+
the `/api/2.0/sql/statements/ <https://docs.databricks.com/api/workspace/statementexecution/executestatement>`_
|
|
76
|
+
endpoint. However, the Sensor can also take the Statement ID of an already-submitted SQL Statement and handle the
|
|
77
|
+
response to that execution.
|
|
78
|
+
|
|
79
|
+
It supports configurable execution parameters such as warehouse selection, catalog, schema, and parameterized queries.
|
|
80
|
+
The operator can either synchronously poll for query completion or run in a deferrable mode for improved efficiency.
|
|
81
|
+
|
|
82
|
+
The only required parameters for using the Sensor are:
|
|
83
|
+
|
|
84
|
+
* One of ``statement`` or ``statement_id`` - The SQL statement to execute. The statement can optionally be
|
|
85
|
+
parameterized, see parameters.
|
|
86
|
+
* ``warehouse_id`` - Warehouse upon which to execute a statement.
|
|
87
|
+
|
|
88
|
+
All other parameters are optional and described in the documentation for ``DatabricksSQLStatementsSensor`` including
|
|
89
|
+
but not limited to:
|
|
90
|
+
|
|
91
|
+
* ``catalog``
|
|
92
|
+
* ``schema``
|
|
93
|
+
* ``parameters``
|
|
94
|
+
|
|
95
|
+
Examples
|
|
96
|
+
--------
|
|
97
|
+
|
|
98
|
+
An example usage of the ``DatabricksSQLStatementsSensor`` is as follows:
|
|
99
|
+
|
|
100
|
+
.. exampleinclude:: /../../databricks/tests/system/databricks/example_databricks_sensors.py
|
|
101
|
+
:language: python
|
|
102
|
+
:start-after: [START howto_sensor_databricks_sql_statement]
|
|
103
|
+
:end-before: [END howto_sensor_databricks_sql_statement]
|
|
@@ -22,12 +22,13 @@ description: |
|
|
|
22
22
|
`Databricks <https://databricks.com/>`__
|
|
23
23
|
|
|
24
24
|
state: ready
|
|
25
|
-
source-date-epoch:
|
|
25
|
+
source-date-epoch: 1749896357
|
|
26
26
|
# Note that those versions are maintained by release manager - do not update them manually
|
|
27
27
|
# with the exception of case where other provider in sources has >= new provider version.
|
|
28
28
|
# In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have
|
|
29
29
|
# to be done in the same PR
|
|
30
30
|
versions:
|
|
31
|
+
- 7.5.0
|
|
31
32
|
- 7.4.0
|
|
32
33
|
- 7.3.2
|
|
33
34
|
- 7.3.1
|
|
@@ -143,6 +144,7 @@ triggers:
|
|
|
143
144
|
sensors:
|
|
144
145
|
- integration-name: Databricks
|
|
145
146
|
python-modules:
|
|
147
|
+
- airflow.providers.databricks.sensors.databricks
|
|
146
148
|
- airflow.providers.databricks.sensors.databricks_sql
|
|
147
149
|
- airflow.providers.databricks.sensors.databricks_partition
|
|
148
150
|
|
|
@@ -25,7 +25,7 @@ build-backend = "flit_core.buildapi"
|
|
|
25
25
|
|
|
26
26
|
[project]
|
|
27
27
|
name = "apache-airflow-providers-databricks"
|
|
28
|
-
version = "7.
|
|
28
|
+
version = "7.5.0rc1"
|
|
29
29
|
description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
|
|
30
30
|
readme = "README.rst"
|
|
31
31
|
authors = [
|
|
@@ -60,7 +60,7 @@ dependencies = [
|
|
|
60
60
|
"apache-airflow>=2.10.0rc1",
|
|
61
61
|
"apache-airflow-providers-common-compat>=1.6.0rc1",
|
|
62
62
|
"apache-airflow-providers-common-sql>=1.27.0rc1",
|
|
63
|
-
"requests>=2.
|
|
63
|
+
"requests>=2.32.0,<3",
|
|
64
64
|
"databricks-sql-connector>=3.0.0",
|
|
65
65
|
"databricks-sqlalchemy>=1.0.2",
|
|
66
66
|
"aiohttp>=3.9.2, <4",
|
|
@@ -130,8 +130,8 @@ apache-airflow-providers-common-sql = {workspace = true}
|
|
|
130
130
|
apache-airflow-providers-standard = {workspace = true}
|
|
131
131
|
|
|
132
132
|
[project.urls]
|
|
133
|
-
"Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
134
|
-
"Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.
|
|
133
|
+
"Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0"
|
|
134
|
+
"Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html"
|
|
135
135
|
"Bug Tracker" = "https://github.com/apache/airflow/issues"
|
|
136
136
|
"Source Code" = "https://github.com/apache/airflow"
|
|
137
137
|
"Slack Chat" = "https://s.apache.org/airflow-slack"
|
|
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
|
|
|
29
29
|
|
|
30
30
|
__all__ = ["__version__"]
|
|
31
31
|
|
|
32
|
-
__version__ = "7.
|
|
32
|
+
__version__ = "7.5.0"
|
|
33
33
|
|
|
34
34
|
if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
|
|
35
35
|
"2.10.0"
|
|
@@ -107,6 +107,7 @@ def get_provider_info():
|
|
|
107
107
|
{
|
|
108
108
|
"integration-name": "Databricks",
|
|
109
109
|
"python-modules": [
|
|
110
|
+
"airflow.providers.databricks.sensors.databricks",
|
|
110
111
|
"airflow.providers.databricks.sensors.databricks_sql",
|
|
111
112
|
"airflow.providers.databricks.sensors.databricks_partition",
|
|
112
113
|
],
|
|
@@ -34,7 +34,6 @@ from airflow.providers.databricks.hooks.databricks import (
|
|
|
34
34
|
DatabricksHook,
|
|
35
35
|
RunLifeCycleState,
|
|
36
36
|
RunState,
|
|
37
|
-
SQLStatementState,
|
|
38
37
|
)
|
|
39
38
|
from airflow.providers.databricks.operators.databricks_workflow import (
|
|
40
39
|
DatabricksWorkflowTaskGroup,
|
|
@@ -46,13 +45,14 @@ from airflow.providers.databricks.plugins.databricks_workflow import (
|
|
|
46
45
|
)
|
|
47
46
|
from airflow.providers.databricks.triggers.databricks import (
|
|
48
47
|
DatabricksExecutionTrigger,
|
|
49
|
-
DatabricksSQLStatementExecutionTrigger,
|
|
50
48
|
)
|
|
51
49
|
from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
|
|
50
|
+
from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
|
|
52
51
|
from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
|
|
53
52
|
|
|
54
53
|
if TYPE_CHECKING:
|
|
55
54
|
from airflow.models.taskinstancekey import TaskInstanceKey
|
|
55
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
56
56
|
from airflow.utils.context import Context
|
|
57
57
|
from airflow.utils.task_group import TaskGroup
|
|
58
58
|
|
|
@@ -978,7 +978,7 @@ class DatabricksRunNowOperator(BaseOperator):
|
|
|
978
978
|
self.log.error("Error: Task: %s with invalid run_id was requested to be cancelled.", self.task_id)
|
|
979
979
|
|
|
980
980
|
|
|
981
|
-
class DatabricksSQLStatementsOperator(BaseOperator):
|
|
981
|
+
class DatabricksSQLStatementsOperator(DatabricksSQLStatementsMixin, BaseOperator):
|
|
982
982
|
"""
|
|
983
983
|
Submits a Databricks SQL Statement to Databricks using the api/2.0/sql/statements/ API endpoint.
|
|
984
984
|
|
|
@@ -1073,59 +1073,6 @@ class DatabricksSQLStatementsOperator(BaseOperator):
|
|
|
1073
1073
|
caller=caller,
|
|
1074
1074
|
)
|
|
1075
1075
|
|
|
1076
|
-
def _handle_operator_execution(self) -> None:
|
|
1077
|
-
end_time = time.time() + self.timeout
|
|
1078
|
-
while end_time > time.time():
|
|
1079
|
-
statement_state = self._hook.get_sql_statement_state(self.statement_id)
|
|
1080
|
-
if statement_state.is_terminal:
|
|
1081
|
-
if statement_state.is_successful:
|
|
1082
|
-
self.log.info("%s completed successfully.", self.task_id)
|
|
1083
|
-
return
|
|
1084
|
-
error_message = (
|
|
1085
|
-
f"{self.task_id} failed with terminal state: {statement_state.state} "
|
|
1086
|
-
f"and with the error code {statement_state.error_code} "
|
|
1087
|
-
f"and error message {statement_state.error_message}"
|
|
1088
|
-
)
|
|
1089
|
-
raise AirflowException(error_message)
|
|
1090
|
-
|
|
1091
|
-
self.log.info("%s in run state: %s", self.task_id, statement_state.state)
|
|
1092
|
-
self.log.info("Sleeping for %s seconds.", self.polling_period_seconds)
|
|
1093
|
-
time.sleep(self.polling_period_seconds)
|
|
1094
|
-
|
|
1095
|
-
self._hook.cancel_sql_statement(self.statement_id)
|
|
1096
|
-
raise AirflowException(
|
|
1097
|
-
f"{self.task_id} timed out after {self.timeout} seconds with state: {statement_state.state}",
|
|
1098
|
-
)
|
|
1099
|
-
|
|
1100
|
-
def _handle_deferrable_operator_execution(self) -> None:
|
|
1101
|
-
statement_state = self._hook.get_sql_statement_state(self.statement_id)
|
|
1102
|
-
end_time = time.time() + self.timeout
|
|
1103
|
-
if not statement_state.is_terminal:
|
|
1104
|
-
if not self.statement_id:
|
|
1105
|
-
raise AirflowException("Failed to retrieve statement_id after submitting SQL statement.")
|
|
1106
|
-
self.defer(
|
|
1107
|
-
trigger=DatabricksSQLStatementExecutionTrigger(
|
|
1108
|
-
statement_id=self.statement_id,
|
|
1109
|
-
databricks_conn_id=self.databricks_conn_id,
|
|
1110
|
-
end_time=end_time,
|
|
1111
|
-
polling_period_seconds=self.polling_period_seconds,
|
|
1112
|
-
retry_limit=self.databricks_retry_limit,
|
|
1113
|
-
retry_delay=self.databricks_retry_delay,
|
|
1114
|
-
retry_args=self.databricks_retry_args,
|
|
1115
|
-
),
|
|
1116
|
-
method_name=DEFER_METHOD_NAME,
|
|
1117
|
-
)
|
|
1118
|
-
else:
|
|
1119
|
-
if statement_state.is_successful:
|
|
1120
|
-
self.log.info("%s completed successfully.", self.task_id)
|
|
1121
|
-
else:
|
|
1122
|
-
error_message = (
|
|
1123
|
-
f"{self.task_id} failed with terminal state: {statement_state.state} "
|
|
1124
|
-
f"and with the error code {statement_state.error_code} "
|
|
1125
|
-
f"and error message {statement_state.error_message}"
|
|
1126
|
-
)
|
|
1127
|
-
raise AirflowException(error_message)
|
|
1128
|
-
|
|
1129
1076
|
def execute(self, context: Context):
|
|
1130
1077
|
json = {
|
|
1131
1078
|
"statement": self.statement,
|
|
@@ -1146,34 +1093,65 @@ class DatabricksSQLStatementsOperator(BaseOperator):
|
|
|
1146
1093
|
if not self.wait_for_termination:
|
|
1147
1094
|
return
|
|
1148
1095
|
if self.deferrable:
|
|
1149
|
-
self.
|
|
1096
|
+
self._handle_deferrable_execution(defer_method_name=DEFER_METHOD_NAME) # type: ignore[misc]
|
|
1150
1097
|
else:
|
|
1151
|
-
self.
|
|
1098
|
+
self._handle_execution() # type: ignore[misc]
|
|
1152
1099
|
|
|
1153
|
-
def
|
|
1100
|
+
def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
|
|
1101
|
+
"""Implement _on_complete because we use statement_id."""
|
|
1102
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
|
1103
|
+
ExternalQueryRunFacet,
|
|
1104
|
+
SQLJobFacet,
|
|
1105
|
+
)
|
|
1106
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
1107
|
+
from airflow.providers.openlineage.sqlparser import DatabaseInfo, SQLParser
|
|
1108
|
+
|
|
1109
|
+
db_info = DatabaseInfo(
|
|
1110
|
+
scheme="databricks",
|
|
1111
|
+
authority=self._hook.host,
|
|
1112
|
+
database=self.catalog,
|
|
1113
|
+
is_uppercase_names=False,
|
|
1114
|
+
# Other args will not be used as we'll not query DB for details, we only do sql parsing.
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
sql_parser = SQLParser(
|
|
1118
|
+
dialect="databricks",
|
|
1119
|
+
default_schema=self.schema or "default",
|
|
1120
|
+
)
|
|
1121
|
+
|
|
1122
|
+
run_facets = {}
|
|
1154
1123
|
if self.statement_id:
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
"Task: %s with statement ID: %s was requested to be cancelled.",
|
|
1158
|
-
self.task_id,
|
|
1159
|
-
self.statement_id,
|
|
1124
|
+
run_facets["externalQuery"] = ExternalQueryRunFacet(
|
|
1125
|
+
externalQueryId=self.statement_id, source=sql_parser.create_namespace(db_info)
|
|
1160
1126
|
)
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1127
|
+
job_facets = {"sql": SQLJobFacet(query=SQLParser.normalize_sql(self.statement))}
|
|
1128
|
+
|
|
1129
|
+
query = f"{self.statement}"
|
|
1130
|
+
if self.parameters:
|
|
1131
|
+
# Catalog, schema or table can be parameterized, so it's crucial to fill them before parsing
|
|
1132
|
+
for param in self.parameters:
|
|
1133
|
+
query = query.replace(f":{param['name']}", param.get("value") or "null")
|
|
1134
|
+
|
|
1135
|
+
parser_result = None
|
|
1136
|
+
try:
|
|
1137
|
+
# Try performing offline sql parsing, without db access,
|
|
1138
|
+
parser_result = sql_parser.generate_openlineage_metadata_from_sql(
|
|
1139
|
+
sql=query,
|
|
1140
|
+
database_info=db_info,
|
|
1141
|
+
database=None, # Provided in db_info
|
|
1142
|
+
use_connection=False, # Prevents DB call for table details, that will fail with API
|
|
1143
|
+
sqlalchemy_engine=None, # Not needed when use_connection is False
|
|
1144
|
+
hook=None, # type: ignore[arg-type] # Not needed when use_connection is False
|
|
1164
1145
|
)
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
error_message = f"SQL Statement execution failed with terminal state: {statement_state} and with the error {error}"
|
|
1176
|
-
raise AirflowException(error_message)
|
|
1146
|
+
except Exception as e:
|
|
1147
|
+
self.log.debug("OpenLineage failed to parse query `%s` with error %s", query, e)
|
|
1148
|
+
|
|
1149
|
+
return OperatorLineage(
|
|
1150
|
+
inputs=parser_result.inputs if parser_result else [],
|
|
1151
|
+
outputs=parser_result.outputs if parser_result else [],
|
|
1152
|
+
job_facets=parser_result.job_facets if parser_result else job_facets,
|
|
1153
|
+
run_facets={**parser_result.run_facets, **run_facets} if parser_result else run_facets,
|
|
1154
|
+
)
|
|
1177
1155
|
|
|
1178
1156
|
|
|
1179
1157
|
class DatabricksTaskBaseOperator(BaseOperator, ABC):
|
|
@@ -277,8 +277,13 @@ class DatabricksCopyIntoOperator(BaseOperator):
|
|
|
277
277
|
self._client_parameters = client_parameters or {}
|
|
278
278
|
if force_copy is not None:
|
|
279
279
|
self._copy_options["force"] = "true" if force_copy else "false"
|
|
280
|
+
self._sql: str | None = None
|
|
280
281
|
|
|
281
282
|
def _get_hook(self) -> DatabricksSqlHook:
|
|
283
|
+
return self._hook
|
|
284
|
+
|
|
285
|
+
@cached_property
|
|
286
|
+
def _hook(self) -> DatabricksSqlHook:
|
|
282
287
|
return DatabricksSqlHook(
|
|
283
288
|
self.databricks_conn_id,
|
|
284
289
|
http_path=self._http_path,
|
|
@@ -354,12 +359,116 @@ FILEFORMAT = {self._file_format}
|
|
|
354
359
|
return sql.strip()
|
|
355
360
|
|
|
356
361
|
def execute(self, context: Context) -> Any:
|
|
357
|
-
|
|
358
|
-
self.log.info("Executing: %s",
|
|
362
|
+
self._sql = self._create_sql_query()
|
|
363
|
+
self.log.info("Executing: %s", self._sql)
|
|
359
364
|
hook = self._get_hook()
|
|
360
|
-
hook.run(
|
|
365
|
+
hook.run(self._sql)
|
|
361
366
|
|
|
362
367
|
def on_kill(self) -> None:
|
|
363
368
|
# NB: on_kill isn't required for this operator since query cancelling gets
|
|
364
369
|
# handled in `DatabricksSqlHook.run()` method which is called in `execute()`
|
|
365
370
|
...
|
|
371
|
+
|
|
372
|
+
def _build_input_openlineage_dataset(self) -> tuple[Any, list[Any]]:
|
|
373
|
+
"""Parse file_location to build the OpenLineage input dataset."""
|
|
374
|
+
from urllib.parse import urlparse
|
|
375
|
+
|
|
376
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset, Error
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
uri = urlparse(self.file_location)
|
|
380
|
+
|
|
381
|
+
# Only process schemes we know produce valid OL datasets with current implementation
|
|
382
|
+
if uri.scheme not in ("s3", "s3a", "s3n", "gs", "abfss", "wasbs"):
|
|
383
|
+
raise ValueError(f"Unsupported scheme: `{uri.scheme}` in `{self.file_location}`")
|
|
384
|
+
|
|
385
|
+
namespace = f"{uri.scheme}://{uri.netloc}"
|
|
386
|
+
name = uri.path.strip("/")
|
|
387
|
+
if name in ("", "."):
|
|
388
|
+
name = "/"
|
|
389
|
+
return Dataset(namespace=namespace, name=name), []
|
|
390
|
+
except Exception as e:
|
|
391
|
+
self.log.debug("Failed to parse file_location: `%s`, error: %s", self.file_location, str(e))
|
|
392
|
+
extraction_errors = [
|
|
393
|
+
Error(errorMessage=str(e), stackTrace=None, task=self.file_location, taskNumber=None)
|
|
394
|
+
]
|
|
395
|
+
return None, extraction_errors
|
|
396
|
+
|
|
397
|
+
def _build_output_openlineage_dataset(self, namespace: str) -> tuple[Any, list[Any]]:
|
|
398
|
+
"""Build output OpenLineage dataset from table information."""
|
|
399
|
+
from airflow.providers.common.compat.openlineage.facet import Dataset, Error
|
|
400
|
+
|
|
401
|
+
try:
|
|
402
|
+
table_parts = self.table_name.split(".")
|
|
403
|
+
if len(table_parts) == 3: # catalog.schema.table
|
|
404
|
+
catalog, schema, table = table_parts
|
|
405
|
+
elif len(table_parts) == 2: # schema.table
|
|
406
|
+
catalog = None
|
|
407
|
+
schema, table = table_parts
|
|
408
|
+
else:
|
|
409
|
+
catalog = None
|
|
410
|
+
schema = None
|
|
411
|
+
table = self.table_name
|
|
412
|
+
|
|
413
|
+
hook = self._get_hook()
|
|
414
|
+
schema = schema or hook.get_openlineage_default_schema() # Fallback to default schema
|
|
415
|
+
catalog = catalog or hook.catalog # Fallback to default catalog, if provided
|
|
416
|
+
|
|
417
|
+
# Combine schema/table with optional catalog for final dataset name
|
|
418
|
+
fq_name = table
|
|
419
|
+
if schema:
|
|
420
|
+
fq_name = f"{schema}.{fq_name}"
|
|
421
|
+
if catalog:
|
|
422
|
+
fq_name = f"{catalog}.{fq_name}"
|
|
423
|
+
|
|
424
|
+
return Dataset(namespace=namespace, name=fq_name), []
|
|
425
|
+
except Exception as e:
|
|
426
|
+
self.log.debug("Failed to construct output dataset: `%s`, error: %s", self.table_name, str(e))
|
|
427
|
+
extraction_errors = [
|
|
428
|
+
Error(errorMessage=str(e), stackTrace=None, task=self.table_name, taskNumber=None)
|
|
429
|
+
]
|
|
430
|
+
return None, extraction_errors
|
|
431
|
+
|
|
432
|
+
def get_openlineage_facets_on_complete(self, _):
|
|
433
|
+
"""Implement _on_complete as we are attaching query id."""
|
|
434
|
+
from airflow.providers.common.compat.openlineage.facet import (
|
|
435
|
+
ExternalQueryRunFacet,
|
|
436
|
+
ExtractionErrorRunFacet,
|
|
437
|
+
SQLJobFacet,
|
|
438
|
+
)
|
|
439
|
+
from airflow.providers.openlineage.extractors import OperatorLineage
|
|
440
|
+
from airflow.providers.openlineage.sqlparser import SQLParser
|
|
441
|
+
|
|
442
|
+
if not self._sql:
|
|
443
|
+
self.log.warning("No SQL query found, returning empty OperatorLineage.")
|
|
444
|
+
return OperatorLineage()
|
|
445
|
+
|
|
446
|
+
hook = self._get_hook()
|
|
447
|
+
run_facets = {}
|
|
448
|
+
|
|
449
|
+
connection = hook.get_connection(self.databricks_conn_id)
|
|
450
|
+
database_info = hook.get_openlineage_database_info(connection)
|
|
451
|
+
dbx_namespace = SQLParser.create_namespace(database_info)
|
|
452
|
+
|
|
453
|
+
if hook.query_ids:
|
|
454
|
+
run_facets["externalQuery"] = ExternalQueryRunFacet(
|
|
455
|
+
externalQueryId=hook.query_ids[0], source=dbx_namespace
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
input_dataset, extraction_errors = self._build_input_openlineage_dataset()
|
|
459
|
+
output_dataset, output_errors = self._build_output_openlineage_dataset(dbx_namespace)
|
|
460
|
+
extraction_errors.extend(output_errors)
|
|
461
|
+
|
|
462
|
+
if extraction_errors:
|
|
463
|
+
run_facets["extractionError"] = ExtractionErrorRunFacet(
|
|
464
|
+
totalTasks=1,
|
|
465
|
+
failedTasks=len(extraction_errors),
|
|
466
|
+
errors=extraction_errors,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
return OperatorLineage(
|
|
470
|
+
inputs=[input_dataset] if input_dataset else [],
|
|
471
|
+
outputs=[output_dataset] if output_dataset else [],
|
|
472
|
+
job_facets={"sql": SQLJobFacet(query=SQLParser.normalize_sql(self._sql))},
|
|
473
|
+
run_facets=run_facets,
|
|
474
|
+
)
|