apache-airflow-providers-databricks 7.4.0rc1__tar.gz → 7.5.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

Files changed (95) hide show
  1. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/PKG-INFO +17 -15
  2. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/README.rst +13 -11
  3. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/changelog.rst +18 -0
  4. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/index.rst +13 -11
  5. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/sql_statements.rst +46 -0
  6. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/provider.yaml +3 -1
  7. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/pyproject.toml +4 -4
  8. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/__init__.py +1 -1
  9. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/get_provider_info.py +1 -0
  10. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks.py +57 -79
  11. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks_sql.py +112 -3
  12. apache_airflow_providers_databricks-7.5.0rc1/src/airflow/providers/databricks/sensors/databricks.py +162 -0
  13. apache_airflow_providers_databricks-7.5.0rc1/src/airflow/providers/databricks/utils/mixins.py +194 -0
  14. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_sensors.py +14 -0
  15. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks.py +154 -0
  16. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_copy.py +177 -0
  17. apache_airflow_providers_databricks-7.5.0rc1/tests/unit/databricks/sensors/test_databricks.py +208 -0
  18. apache_airflow_providers_databricks-7.5.0rc1/tests/unit/databricks/utils/test_mixins.py +127 -0
  19. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/.latest-doc-only-change.txt +0 -0
  20. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/commits.rst +0 -0
  21. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/conf.py +0 -0
  22. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/connections/databricks.rst +0 -0
  23. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/databricks_workflow_task_group_airflow_graph_view.png +0 -0
  24. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/workflow_plugin_launch_task.png +0 -0
  25. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/workflow_plugin_single_task.png +0 -0
  26. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/img/workflow_run_databricks_graph_view.png +0 -0
  27. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/installing-providers-from-sources.rst +0 -0
  28. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/integration-logos/Databricks.png +0 -0
  29. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/copy_into.rst +0 -0
  30. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/index.rst +0 -0
  31. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/jobs_create.rst +0 -0
  32. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/notebook.rst +0 -0
  33. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/repos_create.rst +0 -0
  34. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/repos_delete.rst +0 -0
  35. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/repos_update.rst +0 -0
  36. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/run_now.rst +0 -0
  37. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/sql.rst +0 -0
  38. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/submit_run.rst +0 -0
  39. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/task.rst +0 -0
  40. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/operators/workflow.rst +0 -0
  41. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/plugins/index.rst +0 -0
  42. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/plugins/workflow.rst +0 -0
  43. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/docs/security.rst +0 -0
  44. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/__init__.py +0 -0
  45. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/__init__.py +0 -0
  46. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/LICENSE +0 -0
  47. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/exceptions.py +0 -0
  48. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/__init__.py +0 -0
  49. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/databricks.py +0 -0
  50. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/databricks_base.py +0 -0
  51. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/hooks/databricks_sql.py +0 -0
  52. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/__init__.py +0 -0
  53. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks_repos.py +0 -0
  54. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/operators/databricks_workflow.py +0 -0
  55. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/plugins/__init__.py +0 -0
  56. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/plugins/databricks_workflow.py +0 -0
  57. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/sensors/__init__.py +0 -0
  58. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/sensors/databricks_partition.py +0 -0
  59. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/sensors/databricks_sql.py +0 -0
  60. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/triggers/__init__.py +0 -0
  61. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/triggers/databricks.py +0 -0
  62. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/utils/__init__.py +0 -0
  63. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/utils/databricks.py +0 -0
  64. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/utils/openlineage.py +0 -0
  65. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/src/airflow/providers/databricks/version_compat.py +0 -0
  66. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/conftest.py +0 -0
  67. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/__init__.py +0 -0
  68. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/__init__.py +0 -0
  69. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks.py +0 -0
  70. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_repos.py +0 -0
  71. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_sql.py +0 -0
  72. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/system/databricks/example_databricks_workflow.py +0 -0
  73. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/__init__.py +0 -0
  74. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/__init__.py +0 -0
  75. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/__init__.py +0 -0
  76. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks.py +0 -0
  77. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity.py +0 -0
  78. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_azure_workload_identity_async.py +0 -0
  79. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_base.py +0 -0
  80. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/hooks/test_databricks_sql.py +0 -0
  81. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/__init__.py +0 -0
  82. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_repos.py +0 -0
  83. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_sql.py +0 -0
  84. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/operators/test_databricks_workflow.py +0 -0
  85. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/plugins/__init__.py +0 -0
  86. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/plugins/test_databricks_workflow.py +0 -0
  87. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/sensors/__init__.py +0 -0
  88. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/sensors/test_databricks_partition.py +0 -0
  89. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/sensors/test_databricks_sql.py +0 -0
  90. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/test_exceptions.py +0 -0
  91. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/triggers/__init__.py +0 -0
  92. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/triggers/test_databricks.py +0 -0
  93. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/utils/__init__.py +0 -0
  94. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/utils/test_databricks.py +0 -0
  95. {apache_airflow_providers_databricks-7.4.0rc1 → apache_airflow_providers_databricks-7.5.0rc1}/tests/unit/databricks/utils/test_openlineage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 7.4.0rc1
3
+ Version: 7.5.0rc1
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -23,7 +23,7 @@ Classifier: Topic :: System :: Monitoring
23
23
  Requires-Dist: apache-airflow>=2.10.0rc1
24
24
  Requires-Dist: apache-airflow-providers-common-compat>=1.6.0rc1
25
25
  Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
26
- Requires-Dist: requests>=2.31.0,<3
26
+ Requires-Dist: requests>=2.32.0,<3
27
27
  Requires-Dist: databricks-sql-connector>=3.0.0
28
28
  Requires-Dist: databricks-sqlalchemy>=1.0.2
29
29
  Requires-Dist: aiohttp>=3.9.2, <4
@@ -36,8 +36,8 @@ Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openli
36
36
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
37
37
  Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
38
38
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
39
- Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.4.0/changelog.html
40
- Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.4.0
39
+ Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html
40
+ Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0
41
41
  Project-URL: Mastodon, https://fosstodon.org/@airflow
42
42
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
43
43
  Project-URL: Source Code, https://github.com/apache/airflow
@@ -73,7 +73,7 @@ Provides-Extra: standard
73
73
 
74
74
  Package ``apache-airflow-providers-databricks``
75
75
 
76
- Release: ``7.4.0``
76
+ Release: ``7.5.0``
77
77
 
78
78
 
79
79
  `Databricks <https://databricks.com/>`__
@@ -86,7 +86,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
86
86
  are in ``airflow.providers.databricks`` python package.
87
87
 
88
88
  You can find package information and changelog for the provider
89
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.4.0/>`_.
89
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/>`_.
90
90
 
91
91
  Installation
92
92
  ------------
@@ -106,7 +106,7 @@ PIP package Version required
106
106
  ``apache-airflow`` ``>=2.10.0``
107
107
  ``apache-airflow-providers-common-compat`` ``>=1.6.0``
108
108
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
109
- ``requests`` ``>=2.31.0,<3``
109
+ ``requests`` ``>=2.32.0,<3``
110
110
  ``databricks-sql-connector`` ``>=3.0.0``
111
111
  ``databricks-sqlalchemy`` ``>=1.0.2``
112
112
  ``aiohttp`` ``>=3.9.2,<4``
@@ -125,16 +125,18 @@ You can install such cross-provider dependencies when installing from PyPI. For
125
125
 
126
126
  .. code-block:: bash
127
127
 
128
- pip install apache-airflow-providers-databricks[common.sql]
128
+ pip install apache-airflow-providers-databricks[common.compat]
129
129
 
130
130
 
131
- ============================================================================================================ ==============
132
- Dependent package Extra
133
- ============================================================================================================ ==============
134
- `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
135
- `apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
136
- ============================================================================================================ ==============
131
+ ================================================================================================================== =================
132
+ Dependent package Extra
133
+ ================================================================================================================== =================
134
+ `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
135
+ `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
136
+ `apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
137
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
138
+ ================================================================================================================== =================
137
139
 
138
140
  The changelog for the provider package can be found in the
139
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.4.0/changelog.html>`_.
141
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html>`_.
140
142
 
@@ -23,7 +23,7 @@
23
23
 
24
24
  Package ``apache-airflow-providers-databricks``
25
25
 
26
- Release: ``7.4.0``
26
+ Release: ``7.5.0``
27
27
 
28
28
 
29
29
  `Databricks <https://databricks.com/>`__
@@ -36,7 +36,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
36
36
  are in ``airflow.providers.databricks`` python package.
37
37
 
38
38
  You can find package information and changelog for the provider
39
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.4.0/>`_.
39
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/>`_.
40
40
 
41
41
  Installation
42
42
  ------------
@@ -56,7 +56,7 @@ PIP package Version required
56
56
  ``apache-airflow`` ``>=2.10.0``
57
57
  ``apache-airflow-providers-common-compat`` ``>=1.6.0``
58
58
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
59
- ``requests`` ``>=2.31.0,<3``
59
+ ``requests`` ``>=2.32.0,<3``
60
60
  ``databricks-sql-connector`` ``>=3.0.0``
61
61
  ``databricks-sqlalchemy`` ``>=1.0.2``
62
62
  ``aiohttp`` ``>=3.9.2,<4``
@@ -75,15 +75,17 @@ You can install such cross-provider dependencies when installing from PyPI. For
75
75
 
76
76
  .. code-block:: bash
77
77
 
78
- pip install apache-airflow-providers-databricks[common.sql]
78
+ pip install apache-airflow-providers-databricks[common.compat]
79
79
 
80
80
 
81
- ============================================================================================================ ==============
82
- Dependent package Extra
83
- ============================================================================================================ ==============
84
- `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
85
- `apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
86
- ============================================================================================================ ==============
81
+ ================================================================================================================== =================
82
+ Dependent package Extra
83
+ ================================================================================================================== =================
84
+ `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
85
+ `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
86
+ `apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
87
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
88
+ ================================================================================================================== =================
87
89
 
88
90
  The changelog for the provider package can be found in the
89
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.4.0/changelog.html>`_.
91
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html>`_.
@@ -26,6 +26,24 @@
26
26
  Changelog
27
27
  ---------
28
28
 
29
+ 7.5.0
30
+ .....
31
+
32
+ Features
33
+ ~~~~~~~~
34
+
35
+ * ``feat: Add OpenLineage support for DatabricksSQLStatementsOperator (#50891)``
36
+ * ``feat: Add OpenLineage support for DatabricksCopyIntoOperator (#50821)``
37
+ * ``Adding 'DatabricksSQLStatementSensor' Sensor with Deferrability (#49516)``
38
+
39
+ Misc
40
+ ~~~~
41
+
42
+ * ``Bump some provider dependencies for faster resolution (#51727)``
43
+
44
+ .. Below changes are excluded from the changelog. Move them to
45
+ appropriate section above if needed. Do not delete the lines(!):
46
+
29
47
  7.4.0
30
48
  .....
31
49
 
@@ -78,7 +78,7 @@ apache-airflow-providers-databricks package
78
78
  `Databricks <https://databricks.com/>`__
79
79
 
80
80
 
81
- Release: 7.4.0
81
+ Release: 7.5.0
82
82
 
83
83
  Provider package
84
84
  ----------------
@@ -104,7 +104,7 @@ PIP package Version required
104
104
  ``apache-airflow`` ``>=2.10.0``
105
105
  ``apache-airflow-providers-common-compat`` ``>=1.6.0``
106
106
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
107
- ``requests`` ``>=2.31.0,<3``
107
+ ``requests`` ``>=2.32.0,<3``
108
108
  ``databricks-sql-connector`` ``>=3.0.0``
109
109
  ``databricks-sqlalchemy`` ``>=1.0.2``
110
110
  ``aiohttp`` ``>=3.9.2,<4``
@@ -123,15 +123,17 @@ You can install such cross-provider dependencies when installing from PyPI. For
123
123
 
124
124
  .. code-block:: bash
125
125
 
126
- pip install apache-airflow-providers-databricks[common.sql]
126
+ pip install apache-airflow-providers-databricks[common.compat]
127
127
 
128
128
 
129
- ============================================================================================================ ==============
130
- Dependent package Extra
131
- ============================================================================================================ ==============
132
- `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
133
- `apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
134
- ============================================================================================================ ==============
129
+ ================================================================================================================== =================
130
+ Dependent package Extra
131
+ ================================================================================================================== =================
132
+ `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
133
+ `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
134
+ `apache-airflow-providers-fab <https://airflow.apache.org/docs/apache-airflow-providers-fab>`_ ``fab``
135
+ `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
136
+ ================================================================================================================== =================
135
137
 
136
138
  Downloading official packages
137
139
  -----------------------------
@@ -139,5 +141,5 @@ Downloading official packages
139
141
  You can download officially released packages and verify their checksums and signatures from the
140
142
  `Official Apache Download site <https://downloads.apache.org/airflow/providers/>`_
141
143
 
142
- * `The apache-airflow-providers-databricks 7.4.0 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.4.0.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.4.0.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.4.0.tar.gz.sha512>`__)
143
- * `The apache-airflow-providers-databricks 7.4.0 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.4.0-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.4.0-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.4.0-py3-none-any.whl.sha512>`__)
144
+ * `The apache-airflow-providers-databricks 7.5.0 sdist package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0.tar.gz>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0.tar.gz.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0.tar.gz.sha512>`__)
145
+ * `The apache-airflow-providers-databricks 7.5.0 wheel package <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0-py3-none-any.whl>`_ (`asc <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0-py3-none-any.whl.asc>`__, `sha512 <https://downloads.apache.org/airflow/providers/apache_airflow_providers_databricks-7.5.0-py3-none-any.whl.sha512>`__)
@@ -55,3 +55,49 @@ An example usage of the ``DatabricksSQLStatementsOperator`` is as follows:
55
55
  :language: python
56
56
  :start-after: [START howto_operator_sql_statements]
57
57
  :end-before: [END howto_operator_sql_statements]
58
+
59
+
60
+ .. _howto/sensor:DatabricksSQLStatementsSensor:
61
+
62
+ DatabricksSQLStatementsSensor
63
+ ===============================
64
+
65
+ Use the :class:`~airflow.providers.databricks.sensor.databricks.DatabricksSQLStatementsSensor` to either submit a
66
+ Databricks SQL Statement to Databricks using the
67
+ `Databricks SQL Statement Execution API <https://docs.databricks.com/api/workspace/statementexecution>`_, or pass
68
+ a Statement ID to the Sensor and await for the query to terminate execution.
69
+
70
+
71
+ Using the Sensor
72
+ ------------------
73
+
74
+ The ``DatabricksSQLStatementsSensor`` does one of two things. The Sensor can submit SQL statements to Databricks using
75
+ the `/api/2.0/sql/statements/ <https://docs.databricks.com/api/workspace/statementexecution/executestatement>`_
76
+ endpoint. However, the Sensor can also take the Statement ID of an already-submitted SQL Statement and handle the
77
+ response to that execution.
78
+
79
+ It supports configurable execution parameters such as warehouse selection, catalog, schema, and parameterized queries.
80
+ The operator can either synchronously poll for query completion or run in a deferrable mode for improved efficiency.
81
+
82
+ The only required parameters for using the Sensor are:
83
+
84
+ * One of ``statement`` or ``statement_id`` - The SQL statement to execute. The statement can optionally be
85
+ parameterized, see parameters.
86
+ * ``warehouse_id`` - Warehouse upon which to execute a statement.
87
+
88
+ All other parameters are optional and described in the documentation for ``DatabricksSQLStatementsSensor`` including
89
+ but not limited to:
90
+
91
+ * ``catalog``
92
+ * ``schema``
93
+ * ``parameters``
94
+
95
+ Examples
96
+ --------
97
+
98
+ An example usage of the ``DatabricksSQLStatementsSensor`` is as follows:
99
+
100
+ .. exampleinclude:: /../../databricks/tests/system/databricks/example_databricks_sensors.py
101
+ :language: python
102
+ :start-after: [START howto_sensor_databricks_sql_statement]
103
+ :end-before: [END howto_sensor_databricks_sql_statement]
@@ -22,12 +22,13 @@ description: |
22
22
  `Databricks <https://databricks.com/>`__
23
23
 
24
24
  state: ready
25
- source-date-epoch: 1747132432
25
+ source-date-epoch: 1749896357
26
26
  # Note that those versions are maintained by release manager - do not update them manually
27
27
  # with the exception of case where other provider in sources has >= new provider version.
28
28
  # In such case adding >= NEW_VERSION and bumping to NEW_VERSION in a provider have
29
29
  # to be done in the same PR
30
30
  versions:
31
+ - 7.5.0
31
32
  - 7.4.0
32
33
  - 7.3.2
33
34
  - 7.3.1
@@ -143,6 +144,7 @@ triggers:
143
144
  sensors:
144
145
  - integration-name: Databricks
145
146
  python-modules:
147
+ - airflow.providers.databricks.sensors.databricks
146
148
  - airflow.providers.databricks.sensors.databricks_sql
147
149
  - airflow.providers.databricks.sensors.databricks_partition
148
150
 
@@ -25,7 +25,7 @@ build-backend = "flit_core.buildapi"
25
25
 
26
26
  [project]
27
27
  name = "apache-airflow-providers-databricks"
28
- version = "7.4.0rc1"
28
+ version = "7.5.0rc1"
29
29
  description = "Provider package apache-airflow-providers-databricks for Apache Airflow"
30
30
  readme = "README.rst"
31
31
  authors = [
@@ -60,7 +60,7 @@ dependencies = [
60
60
  "apache-airflow>=2.10.0rc1",
61
61
  "apache-airflow-providers-common-compat>=1.6.0rc1",
62
62
  "apache-airflow-providers-common-sql>=1.27.0rc1",
63
- "requests>=2.31.0,<3",
63
+ "requests>=2.32.0,<3",
64
64
  "databricks-sql-connector>=3.0.0",
65
65
  "databricks-sqlalchemy>=1.0.2",
66
66
  "aiohttp>=3.9.2, <4",
@@ -130,8 +130,8 @@ apache-airflow-providers-common-sql = {workspace = true}
130
130
  apache-airflow-providers-standard = {workspace = true}
131
131
 
132
132
  [project.urls]
133
- "Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.4.0"
134
- "Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.4.0/changelog.html"
133
+ "Documentation" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0"
134
+ "Changelog" = "https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.5.0/changelog.html"
135
135
  "Bug Tracker" = "https://github.com/apache/airflow/issues"
136
136
  "Source Code" = "https://github.com/apache/airflow"
137
137
  "Slack Chat" = "https://s.apache.org/airflow-slack"
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "7.4.0"
32
+ __version__ = "7.5.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.10.0"
@@ -107,6 +107,7 @@ def get_provider_info():
107
107
  {
108
108
  "integration-name": "Databricks",
109
109
  "python-modules": [
110
+ "airflow.providers.databricks.sensors.databricks",
110
111
  "airflow.providers.databricks.sensors.databricks_sql",
111
112
  "airflow.providers.databricks.sensors.databricks_partition",
112
113
  ],
@@ -34,7 +34,6 @@ from airflow.providers.databricks.hooks.databricks import (
34
34
  DatabricksHook,
35
35
  RunLifeCycleState,
36
36
  RunState,
37
- SQLStatementState,
38
37
  )
39
38
  from airflow.providers.databricks.operators.databricks_workflow import (
40
39
  DatabricksWorkflowTaskGroup,
@@ -46,13 +45,14 @@ from airflow.providers.databricks.plugins.databricks_workflow import (
46
45
  )
47
46
  from airflow.providers.databricks.triggers.databricks import (
48
47
  DatabricksExecutionTrigger,
49
- DatabricksSQLStatementExecutionTrigger,
50
48
  )
51
49
  from airflow.providers.databricks.utils.databricks import normalise_json_content, validate_trigger_event
50
+ from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
52
51
  from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
53
52
 
54
53
  if TYPE_CHECKING:
55
54
  from airflow.models.taskinstancekey import TaskInstanceKey
55
+ from airflow.providers.openlineage.extractors import OperatorLineage
56
56
  from airflow.utils.context import Context
57
57
  from airflow.utils.task_group import TaskGroup
58
58
 
@@ -978,7 +978,7 @@ class DatabricksRunNowOperator(BaseOperator):
978
978
  self.log.error("Error: Task: %s with invalid run_id was requested to be cancelled.", self.task_id)
979
979
 
980
980
 
981
- class DatabricksSQLStatementsOperator(BaseOperator):
981
+ class DatabricksSQLStatementsOperator(DatabricksSQLStatementsMixin, BaseOperator):
982
982
  """
983
983
  Submits a Databricks SQL Statement to Databricks using the api/2.0/sql/statements/ API endpoint.
984
984
 
@@ -1073,59 +1073,6 @@ class DatabricksSQLStatementsOperator(BaseOperator):
1073
1073
  caller=caller,
1074
1074
  )
1075
1075
 
1076
- def _handle_operator_execution(self) -> None:
1077
- end_time = time.time() + self.timeout
1078
- while end_time > time.time():
1079
- statement_state = self._hook.get_sql_statement_state(self.statement_id)
1080
- if statement_state.is_terminal:
1081
- if statement_state.is_successful:
1082
- self.log.info("%s completed successfully.", self.task_id)
1083
- return
1084
- error_message = (
1085
- f"{self.task_id} failed with terminal state: {statement_state.state} "
1086
- f"and with the error code {statement_state.error_code} "
1087
- f"and error message {statement_state.error_message}"
1088
- )
1089
- raise AirflowException(error_message)
1090
-
1091
- self.log.info("%s in run state: %s", self.task_id, statement_state.state)
1092
- self.log.info("Sleeping for %s seconds.", self.polling_period_seconds)
1093
- time.sleep(self.polling_period_seconds)
1094
-
1095
- self._hook.cancel_sql_statement(self.statement_id)
1096
- raise AirflowException(
1097
- f"{self.task_id} timed out after {self.timeout} seconds with state: {statement_state.state}",
1098
- )
1099
-
1100
- def _handle_deferrable_operator_execution(self) -> None:
1101
- statement_state = self._hook.get_sql_statement_state(self.statement_id)
1102
- end_time = time.time() + self.timeout
1103
- if not statement_state.is_terminal:
1104
- if not self.statement_id:
1105
- raise AirflowException("Failed to retrieve statement_id after submitting SQL statement.")
1106
- self.defer(
1107
- trigger=DatabricksSQLStatementExecutionTrigger(
1108
- statement_id=self.statement_id,
1109
- databricks_conn_id=self.databricks_conn_id,
1110
- end_time=end_time,
1111
- polling_period_seconds=self.polling_period_seconds,
1112
- retry_limit=self.databricks_retry_limit,
1113
- retry_delay=self.databricks_retry_delay,
1114
- retry_args=self.databricks_retry_args,
1115
- ),
1116
- method_name=DEFER_METHOD_NAME,
1117
- )
1118
- else:
1119
- if statement_state.is_successful:
1120
- self.log.info("%s completed successfully.", self.task_id)
1121
- else:
1122
- error_message = (
1123
- f"{self.task_id} failed with terminal state: {statement_state.state} "
1124
- f"and with the error code {statement_state.error_code} "
1125
- f"and error message {statement_state.error_message}"
1126
- )
1127
- raise AirflowException(error_message)
1128
-
1129
1076
  def execute(self, context: Context):
1130
1077
  json = {
1131
1078
  "statement": self.statement,
@@ -1146,34 +1093,65 @@ class DatabricksSQLStatementsOperator(BaseOperator):
1146
1093
  if not self.wait_for_termination:
1147
1094
  return
1148
1095
  if self.deferrable:
1149
- self._handle_deferrable_operator_execution()
1096
+ self._handle_deferrable_execution(defer_method_name=DEFER_METHOD_NAME) # type: ignore[misc]
1150
1097
  else:
1151
- self._handle_operator_execution()
1098
+ self._handle_execution() # type: ignore[misc]
1152
1099
 
1153
- def on_kill(self):
1100
+ def get_openlineage_facets_on_complete(self, _) -> OperatorLineage:
1101
+ """Implement _on_complete because we use statement_id."""
1102
+ from airflow.providers.common.compat.openlineage.facet import (
1103
+ ExternalQueryRunFacet,
1104
+ SQLJobFacet,
1105
+ )
1106
+ from airflow.providers.openlineage.extractors import OperatorLineage
1107
+ from airflow.providers.openlineage.sqlparser import DatabaseInfo, SQLParser
1108
+
1109
+ db_info = DatabaseInfo(
1110
+ scheme="databricks",
1111
+ authority=self._hook.host,
1112
+ database=self.catalog,
1113
+ is_uppercase_names=False,
1114
+ # Other args will not be used as we'll not query DB for details, we only do sql parsing.
1115
+ )
1116
+
1117
+ sql_parser = SQLParser(
1118
+ dialect="databricks",
1119
+ default_schema=self.schema or "default",
1120
+ )
1121
+
1122
+ run_facets = {}
1154
1123
  if self.statement_id:
1155
- self._hook.cancel_sql_statement(self.statement_id)
1156
- self.log.info(
1157
- "Task: %s with statement ID: %s was requested to be cancelled.",
1158
- self.task_id,
1159
- self.statement_id,
1124
+ run_facets["externalQuery"] = ExternalQueryRunFacet(
1125
+ externalQueryId=self.statement_id, source=sql_parser.create_namespace(db_info)
1160
1126
  )
1161
- else:
1162
- self.log.error(
1163
- "Error: Task: %s with invalid statement_id was requested to be cancelled.", self.task_id
1127
+ job_facets = {"sql": SQLJobFacet(query=SQLParser.normalize_sql(self.statement))}
1128
+
1129
+ query = f"{self.statement}"
1130
+ if self.parameters:
1131
+ # Catalog, schema or table can be parameterized, so it's crucial to fill them before parsing
1132
+ for param in self.parameters:
1133
+ query = query.replace(f":{param['name']}", param.get("value") or "null")
1134
+
1135
+ parser_result = None
1136
+ try:
1137
+ # Try performing offline sql parsing, without db access,
1138
+ parser_result = sql_parser.generate_openlineage_metadata_from_sql(
1139
+ sql=query,
1140
+ database_info=db_info,
1141
+ database=None, # Provided in db_info
1142
+ use_connection=False, # Prevents DB call for table details, that will fail with API
1143
+ sqlalchemy_engine=None, # Not needed when use_connection is False
1144
+ hook=None, # type: ignore[arg-type] # Not needed when use_connection is False
1164
1145
  )
1165
-
1166
- def execute_complete(self, context: dict | None, event: dict):
1167
- statement_state = SQLStatementState.from_json(event["state"])
1168
- error = event["error"]
1169
- statement_id = event["statement_id"]
1170
-
1171
- if statement_state.is_successful:
1172
- self.log.info("SQL Statement with ID %s completed successfully.", statement_id)
1173
- return
1174
-
1175
- error_message = f"SQL Statement execution failed with terminal state: {statement_state} and with the error {error}"
1176
- raise AirflowException(error_message)
1146
+ except Exception as e:
1147
+ self.log.debug("OpenLineage failed to parse query `%s` with error %s", query, e)
1148
+
1149
+ return OperatorLineage(
1150
+ inputs=parser_result.inputs if parser_result else [],
1151
+ outputs=parser_result.outputs if parser_result else [],
1152
+ job_facets=parser_result.job_facets if parser_result else job_facets,
1153
+ run_facets={**parser_result.run_facets, **run_facets} if parser_result else run_facets,
1154
+ )
1177
1155
 
1178
1156
 
1179
1157
  class DatabricksTaskBaseOperator(BaseOperator, ABC):
@@ -277,8 +277,13 @@ class DatabricksCopyIntoOperator(BaseOperator):
277
277
  self._client_parameters = client_parameters or {}
278
278
  if force_copy is not None:
279
279
  self._copy_options["force"] = "true" if force_copy else "false"
280
+ self._sql: str | None = None
280
281
 
281
282
  def _get_hook(self) -> DatabricksSqlHook:
283
+ return self._hook
284
+
285
+ @cached_property
286
+ def _hook(self) -> DatabricksSqlHook:
282
287
  return DatabricksSqlHook(
283
288
  self.databricks_conn_id,
284
289
  http_path=self._http_path,
@@ -354,12 +359,116 @@ FILEFORMAT = {self._file_format}
354
359
  return sql.strip()
355
360
 
356
361
  def execute(self, context: Context) -> Any:
357
- sql = self._create_sql_query()
358
- self.log.info("Executing: %s", sql)
362
+ self._sql = self._create_sql_query()
363
+ self.log.info("Executing: %s", self._sql)
359
364
  hook = self._get_hook()
360
- hook.run(sql)
365
+ hook.run(self._sql)
361
366
 
362
367
  def on_kill(self) -> None:
363
368
  # NB: on_kill isn't required for this operator since query cancelling gets
364
369
  # handled in `DatabricksSqlHook.run()` method which is called in `execute()`
365
370
  ...
371
+
372
+ def _build_input_openlineage_dataset(self) -> tuple[Any, list[Any]]:
373
+ """Parse file_location to build the OpenLineage input dataset."""
374
+ from urllib.parse import urlparse
375
+
376
+ from airflow.providers.common.compat.openlineage.facet import Dataset, Error
377
+
378
+ try:
379
+ uri = urlparse(self.file_location)
380
+
381
+ # Only process schemes we know produce valid OL datasets with current implementation
382
+ if uri.scheme not in ("s3", "s3a", "s3n", "gs", "abfss", "wasbs"):
383
+ raise ValueError(f"Unsupported scheme: `{uri.scheme}` in `{self.file_location}`")
384
+
385
+ namespace = f"{uri.scheme}://{uri.netloc}"
386
+ name = uri.path.strip("/")
387
+ if name in ("", "."):
388
+ name = "/"
389
+ return Dataset(namespace=namespace, name=name), []
390
+ except Exception as e:
391
+ self.log.debug("Failed to parse file_location: `%s`, error: %s", self.file_location, str(e))
392
+ extraction_errors = [
393
+ Error(errorMessage=str(e), stackTrace=None, task=self.file_location, taskNumber=None)
394
+ ]
395
+ return None, extraction_errors
396
+
397
+ def _build_output_openlineage_dataset(self, namespace: str) -> tuple[Any, list[Any]]:
398
+ """Build output OpenLineage dataset from table information."""
399
+ from airflow.providers.common.compat.openlineage.facet import Dataset, Error
400
+
401
+ try:
402
+ table_parts = self.table_name.split(".")
403
+ if len(table_parts) == 3: # catalog.schema.table
404
+ catalog, schema, table = table_parts
405
+ elif len(table_parts) == 2: # schema.table
406
+ catalog = None
407
+ schema, table = table_parts
408
+ else:
409
+ catalog = None
410
+ schema = None
411
+ table = self.table_name
412
+
413
+ hook = self._get_hook()
414
+ schema = schema or hook.get_openlineage_default_schema() # Fallback to default schema
415
+ catalog = catalog or hook.catalog # Fallback to default catalog, if provided
416
+
417
+ # Combine schema/table with optional catalog for final dataset name
418
+ fq_name = table
419
+ if schema:
420
+ fq_name = f"{schema}.{fq_name}"
421
+ if catalog:
422
+ fq_name = f"{catalog}.{fq_name}"
423
+
424
+ return Dataset(namespace=namespace, name=fq_name), []
425
+ except Exception as e:
426
+ self.log.debug("Failed to construct output dataset: `%s`, error: %s", self.table_name, str(e))
427
+ extraction_errors = [
428
+ Error(errorMessage=str(e), stackTrace=None, task=self.table_name, taskNumber=None)
429
+ ]
430
+ return None, extraction_errors
431
+
432
+ def get_openlineage_facets_on_complete(self, _):
433
+ """Implement _on_complete as we are attaching query id."""
434
+ from airflow.providers.common.compat.openlineage.facet import (
435
+ ExternalQueryRunFacet,
436
+ ExtractionErrorRunFacet,
437
+ SQLJobFacet,
438
+ )
439
+ from airflow.providers.openlineage.extractors import OperatorLineage
440
+ from airflow.providers.openlineage.sqlparser import SQLParser
441
+
442
+ if not self._sql:
443
+ self.log.warning("No SQL query found, returning empty OperatorLineage.")
444
+ return OperatorLineage()
445
+
446
+ hook = self._get_hook()
447
+ run_facets = {}
448
+
449
+ connection = hook.get_connection(self.databricks_conn_id)
450
+ database_info = hook.get_openlineage_database_info(connection)
451
+ dbx_namespace = SQLParser.create_namespace(database_info)
452
+
453
+ if hook.query_ids:
454
+ run_facets["externalQuery"] = ExternalQueryRunFacet(
455
+ externalQueryId=hook.query_ids[0], source=dbx_namespace
456
+ )
457
+
458
+ input_dataset, extraction_errors = self._build_input_openlineage_dataset()
459
+ output_dataset, output_errors = self._build_output_openlineage_dataset(dbx_namespace)
460
+ extraction_errors.extend(output_errors)
461
+
462
+ if extraction_errors:
463
+ run_facets["extractionError"] = ExtractionErrorRunFacet(
464
+ totalTasks=1,
465
+ failedTasks=len(extraction_errors),
466
+ errors=extraction_errors,
467
+ )
468
+
469
+ return OperatorLineage(
470
+ inputs=[input_dataset] if input_dataset else [],
471
+ outputs=[output_dataset] if output_dataset else [],
472
+ job_facets={"sql": SQLJobFacet(query=SQLParser.normalize_sql(self._sql))},
473
+ run_facets=run_facets,
474
+ )