apache-airflow-providers-databricks 7.8.0rc1__py3-none-any.whl → 7.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. airflow/providers/databricks/__init__.py +1 -1
  2. airflow/providers/databricks/exceptions.py +1 -1
  3. airflow/providers/databricks/hooks/databricks.py +30 -30
  4. airflow/providers/databricks/hooks/databricks_base.py +1 -1
  5. airflow/providers/databricks/hooks/databricks_sql.py +10 -2
  6. airflow/providers/databricks/operators/databricks.py +8 -10
  7. airflow/providers/databricks/operators/databricks_repos.py +1 -2
  8. airflow/providers/databricks/operators/databricks_sql.py +164 -30
  9. airflow/providers/databricks/operators/databricks_workflow.py +1 -2
  10. airflow/providers/databricks/plugins/databricks_workflow.py +32 -12
  11. airflow/providers/databricks/sensors/databricks.py +1 -3
  12. airflow/providers/databricks/sensors/databricks_partition.py +1 -2
  13. airflow/providers/databricks/sensors/databricks_sql.py +1 -2
  14. airflow/providers/databricks/utils/databricks.py +1 -1
  15. airflow/providers/databricks/utils/mixins.py +3 -7
  16. airflow/providers/databricks/utils/openlineage.py +25 -63
  17. {apache_airflow_providers_databricks-7.8.0rc1.dist-info → apache_airflow_providers_databricks-7.9.0.dist-info}/METADATA +23 -14
  18. apache_airflow_providers_databricks-7.9.0.dist-info/RECORD +31 -0
  19. {apache_airflow_providers_databricks-7.8.0rc1.dist-info → apache_airflow_providers_databricks-7.9.0.dist-info}/licenses/NOTICE +1 -1
  20. apache_airflow_providers_databricks-7.8.0rc1.dist-info/RECORD +0 -31
  21. {apache_airflow_providers_databricks-7.8.0rc1.dist-info → apache_airflow_providers_databricks-7.9.0.dist-info}/WHEEL +0 -0
  22. {apache_airflow_providers_databricks-7.8.0rc1.dist-info → apache_airflow_providers_databricks-7.9.0.dist-info}/entry_points.txt +0 -0
  23. {apache_airflow_providers_databricks-7.8.0rc1.dist-info → apache_airflow_providers_databricks-7.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -29,7 +29,7 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "7.8.0"
32
+ __version__ = "7.9.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
35
  "2.11.0"
@@ -21,7 +21,7 @@
21
21
 
22
22
  from __future__ import annotations
23
23
 
24
- from airflow.exceptions import AirflowException
24
+ from airflow.providers.common.compat.sdk import AirflowException
25
25
 
26
26
 
27
27
  class DatabricksSqlExecutionError(AirflowException):
@@ -20,9 +20,9 @@ Databricks hook.
20
20
 
21
21
  This hook enable the submitting and running of jobs to the Databricks platform. Internally the
22
22
  operators talk to the
23
- ``api/2.1/jobs/run-now``
23
+ ``api/2.2/jobs/run-now``
24
24
  `endpoint <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow>_`
25
- or the ``api/2.1/jobs/runs/submit``
25
+ or the ``api/2.2/jobs/runs/submit``
26
26
  `endpoint <https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit>`_.
27
27
  """
28
28
 
@@ -34,25 +34,25 @@ from typing import Any
34
34
 
35
35
  from requests import exceptions as requests_exceptions
36
36
 
37
- from airflow.exceptions import AirflowException
37
+ from airflow.providers.common.compat.sdk import AirflowException
38
38
  from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
39
39
 
40
- GET_CLUSTER_ENDPOINT = ("GET", "2.0/clusters/get")
41
- RESTART_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/restart")
42
- START_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/start")
43
- TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.0/clusters/delete")
44
-
45
- CREATE_ENDPOINT = ("POST", "2.1/jobs/create")
46
- RESET_ENDPOINT = ("POST", "2.1/jobs/reset")
47
- UPDATE_ENDPOINT = ("POST", "2.1/jobs/update")
48
- RUN_NOW_ENDPOINT = ("POST", "2.1/jobs/run-now")
49
- SUBMIT_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/submit")
50
- GET_RUN_ENDPOINT = ("GET", "2.1/jobs/runs/get")
51
- CANCEL_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/cancel")
52
- DELETE_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/delete")
53
- REPAIR_RUN_ENDPOINT = ("POST", "2.1/jobs/runs/repair")
54
- OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.1/jobs/runs/get-output")
55
- CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.1/jobs/runs/cancel-all")
40
+ GET_CLUSTER_ENDPOINT = ("GET", "2.1/clusters/get")
41
+ RESTART_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/restart")
42
+ START_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/start")
43
+ TERMINATE_CLUSTER_ENDPOINT = ("POST", "2.1/clusters/delete")
44
+
45
+ CREATE_ENDPOINT = ("POST", "2.2/jobs/create")
46
+ RESET_ENDPOINT = ("POST", "2.2/jobs/reset")
47
+ UPDATE_ENDPOINT = ("POST", "2.2/jobs/update")
48
+ RUN_NOW_ENDPOINT = ("POST", "2.2/jobs/run-now")
49
+ SUBMIT_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/submit")
50
+ GET_RUN_ENDPOINT = ("GET", "2.2/jobs/runs/get")
51
+ CANCEL_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/cancel")
52
+ DELETE_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/delete")
53
+ REPAIR_RUN_ENDPOINT = ("POST", "2.2/jobs/runs/repair")
54
+ OUTPUT_RUNS_JOB_ENDPOINT = ("GET", "2.2/jobs/runs/get-output")
55
+ CANCEL_ALL_RUNS_ENDPOINT = ("POST", "2.2/jobs/runs/cancel-all")
56
56
 
57
57
  INSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/install")
58
58
  UNINSTALL_LIBS_ENDPOINT = ("POST", "2.0/libraries/uninstall")
@@ -60,13 +60,13 @@ UPDATE_REPO_ENDPOINT = ("PATCH", "2.0/repos/")
60
60
  DELETE_REPO_ENDPOINT = ("DELETE", "2.0/repos/")
61
61
  CREATE_REPO_ENDPOINT = ("POST", "2.0/repos")
62
62
 
63
- LIST_JOBS_ENDPOINT = ("GET", "2.1/jobs/list")
63
+ LIST_JOBS_ENDPOINT = ("GET", "2.2/jobs/list")
64
64
  LIST_PIPELINES_ENDPOINT = ("GET", "2.0/pipelines")
65
- LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.0/sql/endpoints")
65
+ LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "2.0/sql/warehouses")
66
66
 
67
67
  WORKSPACE_GET_STATUS_ENDPOINT = ("GET", "2.0/workspace/get-status")
68
68
 
69
- SPARK_VERSIONS_ENDPOINT = ("GET", "2.0/clusters/spark-versions")
69
+ SPARK_VERSIONS_ENDPOINT = ("GET", "2.1/clusters/spark-versions")
70
70
  SQL_STATEMENTS_ENDPOINT = "2.0/sql/statements"
71
71
 
72
72
 
@@ -293,7 +293,7 @@ class DatabricksHook(BaseDatabricksHook):
293
293
 
294
294
  def create_job(self, json: dict) -> int:
295
295
  """
296
- Call the ``api/2.1/jobs/create`` endpoint.
296
+ Call the ``api/2.2/jobs/create`` endpoint.
297
297
 
298
298
  :param json: The data used in the body of the request to the ``create`` endpoint.
299
299
  :return: the job_id as an int
@@ -303,7 +303,7 @@ class DatabricksHook(BaseDatabricksHook):
303
303
 
304
304
  def reset_job(self, job_id: str, json: dict) -> None:
305
305
  """
306
- Call the ``api/2.1/jobs/reset`` endpoint.
306
+ Call the ``api/2.2/jobs/reset`` endpoint.
307
307
 
308
308
  :param json: The data used in the new_settings of the request to the ``reset`` endpoint.
309
309
  """
@@ -321,7 +321,7 @@ class DatabricksHook(BaseDatabricksHook):
321
321
 
322
322
  def update_job(self, job_id: str, json: dict) -> None:
323
323
  """
324
- Call the ``api/2.1/jobs/update`` endpoint.
324
+ Call the ``api/2.2/jobs/update`` endpoint.
325
325
 
326
326
  :param job_id: The id of the job to update.
327
327
  :param json: The data used in the new_settings of the request to the ``update`` endpoint.
@@ -330,7 +330,7 @@ class DatabricksHook(BaseDatabricksHook):
330
330
 
331
331
  def run_now(self, json: dict) -> int:
332
332
  """
333
- Call the ``api/2.1/jobs/run-now`` endpoint.
333
+ Call the ``api/2.2/jobs/run-now`` endpoint.
334
334
 
335
335
  :param json: The data used in the body of the request to the ``run-now`` endpoint.
336
336
  :return: the run_id as an int
@@ -340,7 +340,7 @@ class DatabricksHook(BaseDatabricksHook):
340
340
 
341
341
  def submit_run(self, json: dict) -> int:
342
342
  """
343
- Call the ``api/2.1/jobs/runs/submit`` endpoint.
343
+ Call the ``api/2.2/jobs/runs/submit`` endpoint.
344
344
 
345
345
  :param json: The data used in the body of the request to the ``submit`` endpoint.
346
346
  :return: the run_id as an int
@@ -385,9 +385,9 @@ class DatabricksHook(BaseDatabricksHook):
385
385
  all_jobs += [j for j in jobs if j["settings"]["name"] == job_name]
386
386
  else:
387
387
  all_jobs += jobs
388
- has_more = response.get("has_more", False)
389
- if has_more:
390
- page_token = response.get("next_page_token", "")
388
+ # issue-59189: API v2.2 removes "has_more" field
389
+ page_token = response.get("next_page_token", "")
390
+ has_more = bool(page_token)
391
391
 
392
392
  return all_jobs
393
393
 
@@ -49,7 +49,7 @@ from tenacity import (
49
49
  )
50
50
 
51
51
  from airflow import __version__
52
- from airflow.exceptions import AirflowException, AirflowOptionalProviderFeatureException
52
+ from airflow.providers.common.compat.sdk import AirflowException, AirflowOptionalProviderFeatureException
53
53
  from airflow.providers_manager import ProvidersManager
54
54
 
55
55
  try:
@@ -32,9 +32,8 @@ from typing import (
32
32
 
33
33
  from databricks import sql
34
34
  from databricks.sql.types import Row
35
- from sqlalchemy.engine import URL
36
35
 
37
- from airflow.exceptions import AirflowException
36
+ from airflow.providers.common.compat.sdk import AirflowException, AirflowOptionalProviderFeatureException
38
37
  from airflow.providers.common.sql.hooks.handlers import return_single_query_results
39
38
  from airflow.providers.common.sql.hooks.sql import DbApiHook
40
39
  from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
@@ -43,6 +42,7 @@ from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHoo
43
42
 
44
43
  if TYPE_CHECKING:
45
44
  from databricks.sql.client import Connection
45
+ from sqlalchemy.engine import URL
46
46
 
47
47
  from airflow.models.connection import Connection as AirflowConnection
48
48
  from airflow.providers.openlineage.extractors import OperatorLineage
@@ -179,6 +179,14 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
179
179
 
180
180
  :return: the extracted sqlalchemy.engine.URL object.
181
181
  """
182
+ try:
183
+ from sqlalchemy.engine import URL
184
+ except ImportError:
185
+ raise AirflowOptionalProviderFeatureException(
186
+ "sqlalchemy is required to generate the connection URL. "
187
+ "Install it with: pip install 'apache-airflow-providers-databricks[sqlalchemy]'"
188
+ )
189
+
182
190
  url_query = {
183
191
  "http_path": self._http_path,
184
192
  "catalog": self.catalog,
@@ -26,9 +26,7 @@ from collections.abc import Sequence
26
26
  from functools import cached_property
27
27
  from typing import TYPE_CHECKING, Any
28
28
 
29
- from airflow.configuration import conf
30
- from airflow.exceptions import AirflowException
31
- from airflow.providers.common.compat.sdk import BaseOperator, BaseOperatorLink, XCom
29
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator, BaseOperatorLink, XCom, conf
32
30
  from airflow.providers.databricks.hooks.databricks import (
33
31
  DatabricksHook,
34
32
  RunLifeCycleState,
@@ -55,7 +53,7 @@ from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMix
55
53
  from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
56
54
 
57
55
  if TYPE_CHECKING:
58
- from airflow.models.taskinstancekey import TaskInstanceKey
56
+ from airflow.providers.common.compat.sdk import TaskInstanceKey
59
57
  from airflow.providers.databricks.operators.databricks_workflow import (
60
58
  DatabricksWorkflowTaskGroup,
61
59
  )
@@ -261,7 +259,7 @@ class DatabricksCreateJobsOperator(BaseOperator):
261
259
  https://docs.databricks.com/api/workspace/jobs/reset
262
260
 
263
261
  :param json: A JSON object containing API parameters which will be passed
264
- directly to the ``api/2.1/jobs/create`` endpoint. The other named parameters
262
+ directly to the ``api/2.2/jobs/create`` endpoint. The other named parameters
265
263
  (i.e. ``name``, ``tags``, ``tasks``, etc.) to this operator will
266
264
  be merged with this json dictionary if they are provided.
267
265
  If there are conflicts during the merge, the named parameters will
@@ -392,7 +390,7 @@ class DatabricksCreateJobsOperator(BaseOperator):
392
390
 
393
391
  class DatabricksSubmitRunOperator(BaseOperator):
394
392
  """
395
- Submits a Spark job run to Databricks using the api/2.1/jobs/runs/submit API endpoint.
393
+ Submits a Spark job run to Databricks using the api/2.2/jobs/runs/submit API endpoint.
396
394
 
397
395
  See: https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit
398
396
 
@@ -407,7 +405,7 @@ class DatabricksSubmitRunOperator(BaseOperator):
407
405
  .. seealso::
408
406
  https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunsSubmit
409
407
  :param json: A JSON object containing API parameters which will be passed
410
- directly to the ``api/2.1/jobs/runs/submit`` endpoint. The other named parameters
408
+ directly to the ``api/2.2/jobs/runs/submit`` endpoint. The other named parameters
411
409
  (i.e. ``spark_jar_task``, ``notebook_task``..) to this operator will
412
410
  be merged with this json dictionary if they are provided.
413
411
  If there are conflicts during the merge, the named parameters will
@@ -645,14 +643,14 @@ class DatabricksSubmitRunOperator(BaseOperator):
645
643
 
646
644
  class DatabricksRunNowOperator(BaseOperator):
647
645
  """
648
- Runs an existing Spark job run to Databricks using the api/2.1/jobs/run-now API endpoint.
646
+ Runs an existing Spark job run to Databricks using the api/2.2/jobs/run-now API endpoint.
649
647
 
650
648
  See: https://docs.databricks.com/dev-tools/api/latest/jobs.html#operation/JobsRunNow
651
649
 
652
650
  There are two ways to instantiate this operator.
653
651
 
654
652
  In the first way, you can take the JSON payload that you typically use
655
- to call the ``api/2.1/jobs/run-now`` endpoint and pass it directly
653
+ to call the ``api/2.2/jobs/run-now`` endpoint and pass it directly
656
654
  to our ``DatabricksRunNowOperator`` through the ``json`` parameter.
657
655
  For example ::
658
656
 
@@ -730,7 +728,7 @@ class DatabricksRunNowOperator(BaseOperator):
730
728
  https://docs.databricks.com/en/workflows/jobs/settings.html#add-parameters-for-all-job-tasks
731
729
 
732
730
  :param json: A JSON object containing API parameters which will be passed
733
- directly to the ``api/2.1/jobs/run-now`` endpoint. The other named parameters
731
+ directly to the ``api/2.2/jobs/run-now`` endpoint. The other named parameters
734
732
  (i.e. ``notebook_params``, ``spark_submit_params``..) to this operator will
735
733
  be merged with this json dictionary if they are provided.
736
734
  If there are conflicts during the merge, the named parameters will
@@ -25,8 +25,7 @@ from functools import cached_property
25
25
  from typing import TYPE_CHECKING
26
26
  from urllib.parse import urlsplit
27
27
 
28
- from airflow.exceptions import AirflowException
29
- from airflow.providers.common.compat.sdk import BaseOperator
28
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
30
29
  from airflow.providers.databricks.hooks.databricks import DatabricksHook
31
30
 
32
31
  if TYPE_CHECKING:
@@ -21,14 +21,20 @@ from __future__ import annotations
21
21
 
22
22
  import csv
23
23
  import json
24
+ import os
24
25
  from collections.abc import Sequence
25
26
  from functools import cached_property
27
+ from tempfile import NamedTemporaryFile
26
28
  from typing import TYPE_CHECKING, Any, ClassVar
29
+ from urllib.parse import urlparse
27
30
 
28
31
  from databricks.sql.utils import ParamEscaper
29
32
 
30
- from airflow.exceptions import AirflowException
31
- from airflow.providers.common.compat.sdk import BaseOperator
33
+ from airflow.providers.common.compat.sdk import (
34
+ AirflowException,
35
+ AirflowOptionalProviderFeatureException,
36
+ BaseOperator,
37
+ )
32
38
  from airflow.providers.common.sql.operators.sql import SQLExecuteQueryOperator
33
39
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
34
40
 
@@ -63,13 +69,27 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
63
69
  :param catalog: An optional initial catalog to use. Requires DBR version 9.0+ (templated)
64
70
  :param schema: An optional initial schema to use. Requires DBR version 9.0+ (templated)
65
71
  :param output_path: optional string specifying the file to which write selected data. (templated)
66
- :param output_format: format of output data if ``output_path` is specified.
67
- Possible values are ``csv``, ``json``, ``jsonl``. Default is ``csv``.
72
+ Supports local file paths and GCS URIs (e.g., ``gs://bucket/path/file.parquet``).
73
+ When using GCS URIs, requires the ``apache-airflow-providers-google`` package.
74
+ :param output_format: format of output data if ``output_path`` is specified.
75
+ Possible values are ``csv``, ``json``, ``jsonl``, ``parquet``, ``avro``. Default is ``csv``.
68
76
  :param csv_params: parameters that will be passed to the ``csv.DictWriter`` class used to write CSV data.
77
+ :param gcp_conn_id: The connection ID to use for connecting to Google Cloud when using GCS output path.
78
+ Default is ``google_cloud_default``.
79
+ :param gcs_impersonation_chain: Optional service account to impersonate using short-term
80
+ credentials for GCS upload, or chained list of accounts required to get the access_token
81
+ of the last account in the list, which will be impersonated in the request. (templated)
69
82
  """
70
83
 
71
84
  template_fields: Sequence[str] = tuple(
72
- {"_output_path", "schema", "catalog", "http_headers", "databricks_conn_id"}
85
+ {
86
+ "_output_path",
87
+ "schema",
88
+ "catalog",
89
+ "http_headers",
90
+ "databricks_conn_id",
91
+ "_gcs_impersonation_chain",
92
+ }
73
93
  | set(SQLExecuteQueryOperator.template_fields)
74
94
  )
75
95
 
@@ -91,6 +111,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
91
111
  output_format: str = "csv",
92
112
  csv_params: dict[str, Any] | None = None,
93
113
  client_parameters: dict[str, Any] | None = None,
114
+ gcp_conn_id: str = "google_cloud_default",
115
+ gcs_impersonation_chain: str | Sequence[str] | None = None,
94
116
  **kwargs,
95
117
  ) -> None:
96
118
  super().__init__(conn_id=databricks_conn_id, **kwargs)
@@ -106,6 +128,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
106
128
  self.http_headers = http_headers
107
129
  self.catalog = catalog
108
130
  self.schema = schema
131
+ self._gcp_conn_id = gcp_conn_id
132
+ self._gcs_impersonation_chain = gcs_impersonation_chain
109
133
 
110
134
  @cached_property
111
135
  def _hook(self) -> DatabricksSqlHook:
@@ -128,41 +152,151 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
128
152
  def _should_run_output_processing(self) -> bool:
129
153
  return self.do_xcom_push or bool(self._output_path)
130
154
 
155
+ @property
156
+ def _is_gcs_output(self) -> bool:
157
+ """Check if the output path is a GCS URI."""
158
+ return self._output_path.startswith("gs://") if self._output_path else False
159
+
160
+ def _parse_gcs_path(self, path: str) -> tuple[str, str]:
161
+ """Parse a GCS URI into bucket and object name."""
162
+ parsed = urlparse(path)
163
+ bucket = parsed.netloc
164
+ object_name = parsed.path.lstrip("/")
165
+ return bucket, object_name
166
+
167
+ def _upload_to_gcs(self, local_path: str, gcs_path: str) -> None:
168
+ """Upload a local file to GCS."""
169
+ try:
170
+ from airflow.providers.google.cloud.hooks.gcs import GCSHook
171
+ except ImportError:
172
+ raise AirflowOptionalProviderFeatureException(
173
+ "The 'apache-airflow-providers-google' package is required for GCS output. "
174
+ "Install it with: pip install apache-airflow-providers-google"
175
+ )
176
+
177
+ bucket, object_name = self._parse_gcs_path(gcs_path)
178
+ hook = GCSHook(
179
+ gcp_conn_id=self._gcp_conn_id,
180
+ impersonation_chain=self._gcs_impersonation_chain,
181
+ )
182
+ hook.upload(
183
+ bucket_name=bucket,
184
+ object_name=object_name,
185
+ filename=local_path,
186
+ )
187
+ self.log.info("Uploaded output to %s", gcs_path)
188
+
189
+ def _write_parquet(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
190
+ """Write data to a Parquet file."""
191
+ import pyarrow as pa
192
+ import pyarrow.parquet as pq
193
+
194
+ data: dict[str, list] = {name: [] for name in field_names}
195
+ for row in rows:
196
+ row_dict = row._asdict()
197
+ for name in field_names:
198
+ data[name].append(row_dict[name])
199
+
200
+ table = pa.Table.from_pydict(data)
201
+ pq.write_table(table, file_path)
202
+
203
+ def _write_avro(self, file_path: str, field_names: list[str], rows: list[Any]) -> None:
204
+ """Write data to an Avro file using fastavro."""
205
+ try:
206
+ from fastavro import writer
207
+ except ImportError:
208
+ raise AirflowOptionalProviderFeatureException(
209
+ "The 'fastavro' package is required for Avro output. Install it with: pip install fastavro"
210
+ )
211
+
212
+ data: dict[str, list] = {name: [] for name in field_names}
213
+ for row in rows:
214
+ row_dict = row._asdict()
215
+ for name in field_names:
216
+ data[name].append(row_dict[name])
217
+
218
+ schema_fields = []
219
+ for name in field_names:
220
+ sample_val = next(
221
+ (data[name][i] for i in range(len(data[name])) if data[name][i] is not None), None
222
+ )
223
+ if sample_val is None:
224
+ avro_type = ["null", "string"]
225
+ elif isinstance(sample_val, bool):
226
+ avro_type = ["null", "boolean"]
227
+ elif isinstance(sample_val, int):
228
+ avro_type = ["null", "long"]
229
+ elif isinstance(sample_val, float):
230
+ avro_type = ["null", "double"]
231
+ else:
232
+ avro_type = ["null", "string"]
233
+ schema_fields.append({"name": name, "type": avro_type})
234
+
235
+ avro_schema = {
236
+ "type": "record",
237
+ "name": "QueryResult",
238
+ "fields": schema_fields,
239
+ }
240
+
241
+ records = [row._asdict() for row in rows]
242
+ with open(file_path, "wb") as f:
243
+ writer(f, avro_schema, records)
244
+
131
245
  def _process_output(self, results: list[Any], descriptions: list[Sequence[Sequence] | None]) -> list[Any]:
132
246
  if not self._output_path:
133
247
  return list(zip(descriptions, results))
134
248
  if not self._output_format:
135
249
  raise AirflowException("Output format should be specified!")
136
- # Output to a file only the result of last query
250
+
137
251
  last_description = descriptions[-1]
138
252
  last_results = results[-1]
139
253
  if last_description is None:
140
- raise AirflowException("There is missing description present for the output file. .")
254
+ raise AirflowException("There is missing description present for the output file.")
141
255
  field_names = [field[0] for field in last_description]
142
- if self._output_format.lower() == "csv":
143
- with open(self._output_path, "w", newline="") as file:
144
- if self._csv_params:
145
- csv_params = self._csv_params
146
- else:
147
- csv_params = {}
148
- write_header = csv_params.get("header", True)
149
- if "header" in csv_params:
150
- del csv_params["header"]
151
- writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
152
- if write_header:
153
- writer.writeheader()
154
- for row in last_results:
155
- writer.writerow(row._asdict())
156
- elif self._output_format.lower() == "json":
157
- with open(self._output_path, "w") as file:
158
- file.write(json.dumps([row._asdict() for row in last_results]))
159
- elif self._output_format.lower() == "jsonl":
160
- with open(self._output_path, "w") as file:
161
- for row in last_results:
162
- file.write(json.dumps(row._asdict()))
163
- file.write("\n")
256
+
257
+ if self._is_gcs_output:
258
+ suffix = f".{self._output_format.lower()}"
259
+ tmp_file = NamedTemporaryFile(mode="w", suffix=suffix, delete=False, newline="")
260
+ local_path = tmp_file.name
261
+ tmp_file.close()
164
262
  else:
165
- raise AirflowException(f"Unsupported output format: '{self._output_format}'")
263
+ local_path = self._output_path
264
+
265
+ try:
266
+ output_format = self._output_format.lower()
267
+ if output_format == "csv":
268
+ with open(local_path, "w", newline="") as file:
269
+ if self._csv_params:
270
+ csv_params = self._csv_params.copy()
271
+ else:
272
+ csv_params = {}
273
+ write_header = csv_params.pop("header", True)
274
+ writer = csv.DictWriter(file, fieldnames=field_names, **csv_params)
275
+ if write_header:
276
+ writer.writeheader()
277
+ for row in last_results:
278
+ writer.writerow(row._asdict())
279
+ elif output_format == "json":
280
+ with open(local_path, "w") as file:
281
+ file.write(json.dumps([row._asdict() for row in last_results]))
282
+ elif output_format == "jsonl":
283
+ with open(local_path, "w") as file:
284
+ for row in last_results:
285
+ file.write(json.dumps(row._asdict()))
286
+ file.write("\n")
287
+ elif output_format == "parquet":
288
+ self._write_parquet(local_path, field_names, last_results)
289
+ elif output_format == "avro":
290
+ self._write_avro(local_path, field_names, last_results)
291
+ else:
292
+ raise ValueError(f"Unsupported output format: '{self._output_format}'")
293
+
294
+ if self._is_gcs_output:
295
+ self._upload_to_gcs(local_path, self._output_path)
296
+ finally:
297
+ if self._is_gcs_output and os.path.exists(local_path):
298
+ os.unlink(local_path)
299
+
166
300
  return list(zip(descriptions, results))
167
301
 
168
302
 
@@ -25,8 +25,7 @@ from typing import TYPE_CHECKING, Any
25
25
 
26
26
  from mergedeep import merge
27
27
 
28
- from airflow.exceptions import AirflowException
29
- from airflow.providers.common.compat.sdk import BaseOperator, TaskGroup
28
+ from airflow.providers.common.compat.sdk import AirflowException, BaseOperator, TaskGroup
30
29
  from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunLifeCycleState
31
30
  from airflow.providers.databricks.plugins.databricks_workflow import (
32
31
  WorkflowJobRepairAllFailedLink,
@@ -20,11 +20,17 @@ from __future__ import annotations
20
20
  from typing import TYPE_CHECKING, Any
21
21
  from urllib.parse import unquote
22
22
 
23
- from airflow.exceptions import AirflowException, TaskInstanceNotFound
23
+ from airflow.exceptions import TaskInstanceNotFound
24
24
  from airflow.models.dagrun import DagRun
25
25
  from airflow.models.taskinstance import TaskInstance, TaskInstanceKey, clear_task_instances
26
- from airflow.plugins_manager import AirflowPlugin
27
- from airflow.providers.common.compat.sdk import BaseOperatorLink, TaskGroup, XCom
26
+ from airflow.providers.common.compat.sdk import (
27
+ AirflowException,
28
+ AirflowOptionalProviderFeatureException,
29
+ AirflowPlugin,
30
+ BaseOperatorLink,
31
+ TaskGroup,
32
+ XCom,
33
+ )
28
34
  from airflow.providers.databricks.hooks.databricks import DatabricksHook
29
35
  from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
30
36
  from airflow.utils.log.logging_mixin import LoggingMixin
@@ -68,6 +74,10 @@ if not AIRFLOW_V_3_0_PLUS:
68
74
  from flask_appbuilder import BaseView
69
75
  from flask_appbuilder.api import expose
70
76
 
77
+ try:
78
+ from sqlalchemy import select
79
+ except ImportError:
80
+ select = None # type: ignore[assignment,misc]
71
81
  from airflow.utils.session import NEW_SESSION, provide_session
72
82
  from airflow.www import auth
73
83
 
@@ -140,10 +150,17 @@ if not AIRFLOW_V_3_0_PLUS:
140
150
  :param session: The SQLAlchemy session to use for the query. If None, uses the default session.
141
151
  :return: The DagRun object associated with the specified DAG and run_id.
142
152
  """
153
+ if select is None:
154
+ raise AirflowOptionalProviderFeatureException(
155
+ "sqlalchemy is required for workflow repair functionality. "
156
+ "Install it with: pip install 'apache-airflow-providers-databricks[sqlalchemy]'"
157
+ )
143
158
  if not session:
144
159
  raise AirflowException("Session not provided.")
145
160
 
146
- return session.query(DagRun).filter(DagRun.dag_id == dag.dag_id, DagRun.run_id == run_id).one()
161
+ return session.scalars(
162
+ select(DagRun).where(DagRun.dag_id == dag.dag_id, DagRun.run_id == run_id)
163
+ ).one()
147
164
 
148
165
  @provide_session
149
166
  def _clear_task_instances(
@@ -157,20 +174,23 @@ if not AIRFLOW_V_3_0_PLUS:
157
174
 
158
175
  @provide_session
159
176
  def get_task_instance(operator: BaseOperator, dttm, session: Session = NEW_SESSION) -> TaskInstance:
177
+ if select is None:
178
+ raise AirflowOptionalProviderFeatureException(
179
+ "sqlalchemy is required to get task instance. "
180
+ "Install it with: pip install 'apache-airflow-providers-databricks[sqlalchemy]'"
181
+ )
160
182
  dag_id = operator.dag.dag_id
161
183
  if hasattr(DagRun, "execution_date"): # Airflow 2.x.
162
184
  dag_run = DagRun.find(dag_id, execution_date=dttm)[0] # type: ignore[call-arg]
163
185
  else:
164
186
  dag_run = DagRun.find(dag_id, logical_date=dttm)[0]
165
- ti = (
166
- session.query(TaskInstance)
167
- .filter(
187
+ ti = session.scalars(
188
+ select(TaskInstance).where(
168
189
  TaskInstance.dag_id == dag_id,
169
190
  TaskInstance.run_id == dag_run.run_id,
170
191
  TaskInstance.task_id == operator.task_id,
171
192
  )
172
- .one_or_none()
173
- )
193
+ ).one_or_none()
174
194
  if not ti:
175
195
  raise TaskInstanceNotFound("Task instance not found")
176
196
  return ti
@@ -278,7 +298,7 @@ class WorkflowJobRunLink(BaseOperatorLink, LoggingMixin):
278
298
  """XCom key where the link is stored during task execution."""
279
299
  return "databricks_job_run_link"
280
300
 
281
- def get_link(
301
+ def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
282
302
  self,
283
303
  operator: BaseOperator,
284
304
  dttm=None,
@@ -354,7 +374,7 @@ class WorkflowJobRepairAllFailedLink(BaseOperatorLink, LoggingMixin):
354
374
 
355
375
  name = "Repair All Failed Tasks"
356
376
 
357
- def get_link(
377
+ def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
358
378
  self,
359
379
  operator,
360
380
  dttm=None,
@@ -451,7 +471,7 @@ class WorkflowJobRepairSingleTaskLink(BaseOperatorLink, LoggingMixin):
451
471
 
452
472
  name = "Repair a single task"
453
473
 
454
- def get_link(
474
+ def get_link( # type: ignore[override] # Signature intentionally kept this way for Airflow 2.x compatibility
455
475
  self,
456
476
  operator,
457
477
  dttm=None,
@@ -22,9 +22,7 @@ from collections.abc import Sequence
22
22
  from functools import cached_property
23
23
  from typing import TYPE_CHECKING, Any
24
24
 
25
- from airflow.configuration import conf
26
- from airflow.exceptions import AirflowException
27
- from airflow.providers.common.compat.sdk import BaseSensorOperator
25
+ from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator, conf
28
26
  from airflow.providers.databricks.hooks.databricks import DatabricksHook, SQLStatementState
29
27
  from airflow.providers.databricks.operators.databricks import DEFER_METHOD_NAME
30
28
  from airflow.providers.databricks.utils.mixins import DatabricksSQLStatementsMixin
@@ -27,8 +27,7 @@ from typing import TYPE_CHECKING, Any
27
27
 
28
28
  from databricks.sql.utils import ParamEscaper
29
29
 
30
- from airflow.exceptions import AirflowException
31
- from airflow.providers.common.compat.sdk import BaseSensorOperator
30
+ from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator
32
31
  from airflow.providers.common.sql.hooks.handlers import fetch_all_handler
33
32
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
34
33
 
@@ -24,8 +24,7 @@ from collections.abc import Callable, Iterable, Sequence
24
24
  from functools import cached_property
25
25
  from typing import TYPE_CHECKING, Any
26
26
 
27
- from airflow.exceptions import AirflowException
28
- from airflow.providers.common.compat.sdk import BaseSensorOperator
27
+ from airflow.providers.common.compat.sdk import AirflowException, BaseSensorOperator
29
28
  from airflow.providers.common.sql.hooks.handlers import fetch_all_handler
30
29
  from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
31
30
 
@@ -17,7 +17,7 @@
17
17
  # under the License.
18
18
  from __future__ import annotations
19
19
 
20
- from airflow.exceptions import AirflowException
20
+ from airflow.providers.common.compat.sdk import AirflowException
21
21
  from airflow.providers.databricks.hooks.databricks import DatabricksHook, RunState
22
22
 
23
23
 
@@ -20,18 +20,14 @@ from __future__ import annotations
20
20
 
21
21
  import time
22
22
  from logging import Logger
23
- from typing import (
24
- TYPE_CHECKING,
25
- Any,
26
- Protocol,
27
- )
23
+ from typing import TYPE_CHECKING, Any, Protocol
28
24
 
29
- from airflow.exceptions import AirflowException
25
+ from airflow.providers.common.compat.sdk import AirflowException
30
26
  from airflow.providers.databricks.hooks.databricks import DatabricksHook, SQLStatementState
31
27
  from airflow.providers.databricks.triggers.databricks import DatabricksSQLStatementExecutionTrigger
32
28
 
33
29
  if TYPE_CHECKING:
34
- from airflow.utils.context import Context
30
+ from airflow.sdk import Context
35
31
 
36
32
 
37
33
  class GetHookHasFields(Protocol):
@@ -24,7 +24,6 @@ from typing import TYPE_CHECKING, Any
24
24
  import requests
25
25
 
26
26
  from airflow.providers.common.compat.openlineage.check import require_openlineage_version
27
- from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
28
27
  from airflow.utils import timezone
29
28
 
30
29
  if TYPE_CHECKING:
@@ -37,60 +36,6 @@ if TYPE_CHECKING:
37
36
  log = logging.getLogger(__name__)
38
37
 
39
38
 
40
- def _get_logical_date(task_instance):
41
- # todo: remove when min airflow version >= 3.0
42
- if AIRFLOW_V_3_0_PLUS:
43
- dagrun = task_instance.get_template_context()["dag_run"]
44
- return dagrun.logical_date or dagrun.run_after
45
-
46
- if hasattr(task_instance, "logical_date"):
47
- date = task_instance.logical_date
48
- else:
49
- date = task_instance.execution_date
50
-
51
- return date
52
-
53
-
54
- def _get_dag_run_clear_number(task_instance):
55
- # todo: remove when min airflow version >= 3.0
56
- if AIRFLOW_V_3_0_PLUS:
57
- dagrun = task_instance.get_template_context()["dag_run"]
58
- return dagrun.clear_number
59
- return task_instance.dag_run.clear_number
60
-
61
-
62
- # todo: move this run_id logic into OpenLineage's listener to avoid differences
63
- def _get_ol_run_id(task_instance) -> str:
64
- """
65
- Get OpenLineage run_id from TaskInstance.
66
-
67
- It's crucial that the task_instance's run_id creation logic matches OpenLineage's listener implementation.
68
- Only then can we ensure that the generated run_id aligns with the Airflow task,
69
- enabling a proper connection between events.
70
- """
71
- from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
72
-
73
- # Generate same OL run id as is generated for current task instance
74
- return OpenLineageAdapter.build_task_instance_run_id(
75
- dag_id=task_instance.dag_id,
76
- task_id=task_instance.task_id,
77
- logical_date=_get_logical_date(task_instance),
78
- try_number=task_instance.try_number,
79
- map_index=task_instance.map_index,
80
- )
81
-
82
-
83
- # todo: move this run_id logic into OpenLineage's listener to avoid differences
84
- def _get_ol_dag_run_id(task_instance) -> str:
85
- from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
86
-
87
- return OpenLineageAdapter.build_dag_run_id(
88
- dag_id=task_instance.dag_id,
89
- logical_date=_get_logical_date(task_instance),
90
- clear_number=_get_dag_run_clear_number(task_instance),
91
- )
92
-
93
-
94
39
  def _get_parent_run_facet(task_instance):
95
40
  """
96
41
  Retrieve the ParentRunFacet associated with a specific Airflow task instance.
@@ -101,22 +46,39 @@ def _get_parent_run_facet(task_instance):
101
46
  """
102
47
  from openlineage.client.facet_v2 import parent_run
103
48
 
104
- from airflow.providers.openlineage.conf import namespace
49
+ from airflow.providers.openlineage.plugins.macros import (
50
+ lineage_job_name,
51
+ lineage_job_namespace,
52
+ lineage_root_job_name,
53
+ lineage_root_run_id,
54
+ lineage_run_id,
55
+ )
56
+
57
+ parent_run_id = lineage_run_id(task_instance)
58
+ parent_job_name = lineage_job_name(task_instance)
59
+ parent_job_namespace = lineage_job_namespace()
60
+
61
+ root_parent_run_id = lineage_root_run_id(task_instance)
62
+ rot_parent_job_name = lineage_root_job_name(task_instance)
63
+
64
+ try: # Added in OL provider 2.9.0, try to use it if possible
65
+ from airflow.providers.openlineage.plugins.macros import lineage_root_job_namespace
105
66
 
106
- parent_run_id = _get_ol_run_id(task_instance)
107
- root_parent_run_id = _get_ol_dag_run_id(task_instance)
67
+ root_parent_job_namespace = lineage_root_job_namespace(task_instance)
68
+ except ImportError:
69
+ root_parent_job_namespace = lineage_job_namespace()
108
70
 
109
71
  return parent_run.ParentRunFacet(
110
72
  run=parent_run.Run(runId=parent_run_id),
111
73
  job=parent_run.Job(
112
- namespace=namespace(),
113
- name=f"{task_instance.dag_id}.{task_instance.task_id}",
74
+ namespace=parent_job_namespace,
75
+ name=parent_job_name,
114
76
  ),
115
77
  root=parent_run.Root(
116
78
  run=parent_run.RootRun(runId=root_parent_run_id),
117
79
  job=parent_run.RootJob(
118
- name=task_instance.dag_id,
119
- namespace=namespace(),
80
+ name=rot_parent_job_name,
81
+ namespace=root_parent_job_namespace,
120
82
  ),
121
83
  ),
122
84
  )
@@ -209,7 +171,7 @@ def _create_ol_event_pair(
209
171
  return start, end
210
172
 
211
173
 
212
- @require_openlineage_version(provider_min_version="2.3.0")
174
+ @require_openlineage_version(provider_min_version="2.5.0")
213
175
  def emit_openlineage_events_for_databricks_queries(
214
176
  task_instance,
215
177
  hook: DatabricksSqlHook | DatabricksHook | None = None,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 7.8.0rc1
3
+ Version: 7.9.0
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -22,34 +22,40 @@ Classifier: Programming Language :: Python :: 3.13
22
22
  Classifier: Topic :: System :: Monitoring
23
23
  License-File: LICENSE
24
24
  License-File: NOTICE
25
- Requires-Dist: apache-airflow>=2.11.0rc1
26
- Requires-Dist: apache-airflow-providers-common-compat>=1.8.0rc1
27
- Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
25
+ Requires-Dist: apache-airflow>=2.11.0
26
+ Requires-Dist: apache-airflow-providers-common-compat>=1.13.0
27
+ Requires-Dist: apache-airflow-providers-common-sql>=1.27.0
28
28
  Requires-Dist: requests>=2.32.0,<3
29
29
  Requires-Dist: databricks-sql-connector>=4.0.0
30
- Requires-Dist: databricks-sqlalchemy>=1.0.2
31
30
  Requires-Dist: aiohttp>=3.9.2, <4
32
31
  Requires-Dist: mergedeep>=1.3.4
33
32
  Requires-Dist: pandas>=2.1.2; python_version <"3.13"
34
33
  Requires-Dist: pandas>=2.2.3; python_version >="3.13"
35
34
  Requires-Dist: pyarrow>=16.1.0; python_version < '3.13'
36
35
  Requires-Dist: pyarrow>=18.0.0; python_version >= '3.13'
36
+ Requires-Dist: fastavro>=1.9.0 ; extra == "avro"
37
+ Requires-Dist: fastavro>=1.10.0 ; extra == "avro" and (python_version>="3.12")
37
38
  Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
38
- Requires-Dist: apache-airflow-providers-fab>=2.2.0rc1 ; extra == "fab" and ( python_version < '3.13')
39
- Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
39
+ Requires-Dist: apache-airflow-providers-fab>=2.2.0 ; extra == "fab" and ( python_version < '3.13')
40
+ Requires-Dist: apache-airflow-providers-google>=10.24.0 ; extra == "google"
41
+ Requires-Dist: apache-airflow-providers-openlineage>=2.3.0 ; extra == "openlineage"
40
42
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
43
+ Requires-Dist: databricks-sqlalchemy>=1.0.2 ; extra == "sqlalchemy"
41
44
  Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
42
45
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
43
- Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.8.0/changelog.html
44
- Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.8.0
46
+ Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html
47
+ Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0
45
48
  Project-URL: Mastodon, https://fosstodon.org/@airflow
46
49
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
47
50
  Project-URL: Source Code, https://github.com/apache/airflow
48
51
  Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
52
+ Provides-Extra: avro
49
53
  Provides-Extra: azure-identity
50
54
  Provides-Extra: fab
55
+ Provides-Extra: google
51
56
  Provides-Extra: openlineage
52
57
  Provides-Extra: sdk
58
+ Provides-Extra: sqlalchemy
53
59
  Provides-Extra: standard
54
60
 
55
61
 
@@ -77,7 +83,7 @@ Provides-Extra: standard
77
83
 
78
84
  Package ``apache-airflow-providers-databricks``
79
85
 
80
- Release: ``7.8.0``
86
+ Release: ``7.9.0``
81
87
 
82
88
 
83
89
  `Databricks <https://databricks.com/>`__
@@ -90,7 +96,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
90
96
  are in ``airflow.providers.databricks`` python package.
91
97
 
92
98
  You can find package information and changelog for the provider
93
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.8.0/>`_.
99
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/>`_.
94
100
 
95
101
  Installation
96
102
  ------------
@@ -108,11 +114,10 @@ Requirements
108
114
  PIP package Version required
109
115
  ========================================== ======================================
110
116
  ``apache-airflow`` ``>=2.11.0``
111
- ``apache-airflow-providers-common-compat`` ``>=1.8.0``
117
+ ``apache-airflow-providers-common-compat`` ``>=1.13.0``
112
118
  ``apache-airflow-providers-common-sql`` ``>=1.27.0``
113
119
  ``requests`` ``>=2.32.0,<3``
114
120
  ``databricks-sql-connector`` ``>=4.0.0``
115
- ``databricks-sqlalchemy`` ``>=1.0.2``
116
121
  ``aiohttp`` ``>=3.9.2,<4``
117
122
  ``mergedeep`` ``>=1.3.4``
118
123
  ``pandas`` ``>=2.1.2; python_version < "3.13"``
@@ -139,6 +144,7 @@ Dependent package
139
144
  ================================================================================================================== =================
140
145
  `apache-airflow-providers-common-compat <https://airflow.apache.org/docs/apache-airflow-providers-common-compat>`_ ``common.compat``
141
146
  `apache-airflow-providers-common-sql <https://airflow.apache.org/docs/apache-airflow-providers-common-sql>`_ ``common.sql``
147
+ `apache-airflow-providers-google <https://airflow.apache.org/docs/apache-airflow-providers-google>`_ ``google``
142
148
  `apache-airflow-providers-openlineage <https://airflow.apache.org/docs/apache-airflow-providers-openlineage>`_ ``openlineage``
143
149
  ================================================================================================================== =================
144
150
 
@@ -153,8 +159,11 @@ Extra Dependencies
153
159
  ``fab`` ``apache-airflow-providers-fab>=2.2.0; python_version < '3.13'``
154
160
  ``standard`` ``apache-airflow-providers-standard``
155
161
  ``openlineage`` ``apache-airflow-providers-openlineage>=2.3.0``
162
+ ``sqlalchemy`` ``databricks-sqlalchemy>=1.0.2``
163
+ ``google`` ``apache-airflow-providers-google>=10.24.0``
164
+ ``avro`` ``fastavro>=1.9.0``, ``fastavro>=1.10.0;python_version>="3.12"``
156
165
  ================== ================================================================
157
166
 
158
167
  The changelog for the provider package can be found in the
159
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.8.0/changelog.html>`_.
168
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.9.0/changelog.html>`_.
160
169
 
@@ -0,0 +1,31 @@
1
+ airflow/providers/databricks/__init__.py,sha256=nAdKvPEVae_IY8zBScAW_De79Ob4OC-dGDALAno1HA0,1499
2
+ airflow/providers/databricks/exceptions.py,sha256=v7TD8auFp9LmyWqRtnXYG8mOit0WE3OuInUNFoC0zTo,1278
3
+ airflow/providers/databricks/get_provider_info.py,sha256=LfK0AwIARVh4tX5146-J2VRZwfe6GP3xjLyltA7X7iU,5738
4
+ airflow/providers/databricks/version_compat.py,sha256=RQbdCueLOaFZWekpQmF0BoAoJInW8EoyvJ3Ah-HbrPo,1577
5
+ airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
6
+ airflow/providers/databricks/hooks/databricks.py,sha256=eYvrc9H3-gpGZRXBGms_DyjeFjxg-JB1lYKmdr2bwcE,29789
7
+ airflow/providers/databricks/hooks/databricks_base.py,sha256=ud9Mxzi86tAaGunlx0vypLR6ICapdn2qyFlT3WFjZjQ,36881
8
+ airflow/providers/databricks/hooks/databricks_sql.py,sha256=4LSTSYxHPJolmB91eOP_LuShyAUcjWATx6-ywUx8ASc,18149
9
+ airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
10
+ airflow/providers/databricks/operators/databricks.py,sha256=NqcMOAlC_OvkrBFUaRFQa37P36Shja-plECZzg04Gl8,79258
11
+ airflow/providers/databricks/operators/databricks_repos.py,sha256=jOrYO_tFQJ5JBXeu7Rhrc3pcQJ4qtzSGSjGZ4GffmwU,13125
12
+ airflow/providers/databricks/operators/databricks_sql.py,sha256=9hXLFSUtdVlg45lwBTIZgY33is5-Kkgp00Cz22sI-yg,27076
13
+ airflow/providers/databricks/operators/databricks_workflow.py,sha256=xqk6kbFcqArHo4w9E0sVGbAkX2tuBqWdtvwiFyc9jzo,14989
14
+ airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
15
+ airflow/providers/databricks/plugins/databricks_workflow.py,sha256=Tg4fgrMQ31NqtcjPK6D61ehSqp-Jtf3_OS4db7BDSCo,21019
16
+ airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
17
+ airflow/providers/databricks/sensors/databricks.py,sha256=dtVcb-Ka9R3l8y_59hdz65be3JUIVEsAodTsviwx1Mg,6199
18
+ airflow/providers/databricks/sensors/databricks_partition.py,sha256=AV7GoAIRnV7NEtbqUxp9WdSeN-LeIc49I3_NaI1cBiY,9910
19
+ airflow/providers/databricks/sensors/databricks_sql.py,sha256=ON3ulhD0I4ukJhKzDYTqw-8ZkdUuED_8QyDZbzFgHko,5603
20
+ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
21
+ airflow/providers/databricks/triggers/databricks.py,sha256=DQbXLw1W_e3Iw-hsDph7vPuHc2caj623V7WmA2_PftM,8672
22
+ airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
23
+ airflow/providers/databricks/utils/databricks.py,sha256=bnZdjQ1etvAcfgdmb8BR4i1M4YjdcDXxxznVtmur1GM,5134
24
+ airflow/providers/databricks/utils/mixins.py,sha256=XDA9v9BeCgMIznYPpa-X7XIqrD1mJbw4eSQUjvTsQXI,7397
25
+ airflow/providers/databricks/utils/openlineage.py,sha256=naqLzbdBebwDUPvDhhIa5Ey_8SgKkYqdwhzJC_51gFU,13674
26
+ apache_airflow_providers_databricks-7.9.0.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
27
+ apache_airflow_providers_databricks-7.9.0.dist-info/licenses/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
28
+ apache_airflow_providers_databricks-7.9.0.dist-info/licenses/NOTICE,sha256=_cWHznIoUSbLCY_KfmKqetlKlsoH0c2VBjmZjElAzuc,168
29
+ apache_airflow_providers_databricks-7.9.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
30
+ apache_airflow_providers_databricks-7.9.0.dist-info/METADATA,sha256=bbY2URbAFkKKJ5xXgaSB6vIhmfS27RefxG4X0DesqOc,8325
31
+ apache_airflow_providers_databricks-7.9.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Apache Airflow
2
- Copyright 2016-2025 The Apache Software Foundation
2
+ Copyright 2016-2026 The Apache Software Foundation
3
3
 
4
4
  This product includes software developed at
5
5
  The Apache Software Foundation (http://www.apache.org/).
@@ -1,31 +0,0 @@
1
- airflow/providers/databricks/__init__.py,sha256=jK9hWZ8jptf_y_7T6PywlGiLdB4zq1EsGECyxMGFi8A,1499
2
- airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
3
- airflow/providers/databricks/get_provider_info.py,sha256=LfK0AwIARVh4tX5146-J2VRZwfe6GP3xjLyltA7X7iU,5738
4
- airflow/providers/databricks/version_compat.py,sha256=RQbdCueLOaFZWekpQmF0BoAoJInW8EoyvJ3Ah-HbrPo,1577
5
- airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
6
- airflow/providers/databricks/hooks/databricks.py,sha256=uOBPUUAEc9eHBdvMgNyJzWivIFCt_GQgR4UlRrRxqgM,29754
7
- airflow/providers/databricks/hooks/databricks_base.py,sha256=m-m2AKqD3-6mEfvuwgo7Era47zGzsjKbpLTRQNjiUS4,36864
8
- airflow/providers/databricks/hooks/databricks_sql.py,sha256=xougOWuFgQzhBzFcuYkbX-lo0FpKCQztXoBETJEzesg,17755
9
- airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
10
- airflow/providers/databricks/operators/databricks.py,sha256=bVhFNTeGboHkmaJkYDYEyF0V1QUOB_RnsvwaCuEtIew,79316
11
- airflow/providers/databricks/operators/databricks_repos.py,sha256=VRZye45ZMlDxti6ZJjuouox5umiMoeQ-BKugPpE7jnM,13155
12
- airflow/providers/databricks/operators/databricks_sql.py,sha256=gwpkr660qpk4dUve98RB-hniaMzuXL6znQZZGilJxi0,21842
13
- airflow/providers/databricks/operators/databricks_workflow.py,sha256=QLsR0pGLWvvQbutsjj4RWwBE-z6tkWiYLHj6waMv8ZE,15019
14
- airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
15
- airflow/providers/databricks/plugins/databricks_workflow.py,sha256=5vyG2WNM25ptSv5IwAndUTqKAOmTneOWy_pAtqBKcgc,20020
16
- airflow/providers/databricks/sensors/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
17
- airflow/providers/databricks/sensors/databricks.py,sha256=RrjSzncvppdp5U8RYHd975MCIQIb_s1VQoxm9Aqbvac,6262
18
- airflow/providers/databricks/sensors/databricks_partition.py,sha256=qPDy8oxg-Lo-jnHy1EbxmA5GIjC6t0XnFJ1E3aAmUgg,9940
19
- airflow/providers/databricks/sensors/databricks_sql.py,sha256=shq7ng4LCiaD4Q7lorm4g1A7aijmq3nVUnCFlYtoI7c,5633
20
- airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
21
- airflow/providers/databricks/triggers/databricks.py,sha256=DQbXLw1W_e3Iw-hsDph7vPuHc2caj623V7WmA2_PftM,8672
22
- airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
23
- airflow/providers/databricks/utils/databricks.py,sha256=ecvzZbC4KdXds47VeSayot9EO-RQnTRJTEwKITH7waQ,5117
24
- airflow/providers/databricks/utils/mixins.py,sha256=WUmkt3AmXalmV6zOUIJZWbTldxYunAZOstddDhKCC94,7407
25
- airflow/providers/databricks/utils/openlineage.py,sha256=1jT5Woh9YifawdP-VFWsabfF-ecuCjPlzD5P_W4DAhI,15078
26
- apache_airflow_providers_databricks-7.8.0rc1.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
27
- apache_airflow_providers_databricks-7.8.0rc1.dist-info/licenses/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
28
- apache_airflow_providers_databricks-7.8.0rc1.dist-info/licenses/NOTICE,sha256=E3-_E02gwwSEFzeeWPKmnIjOoos3hW28CLISV6sYrbQ,168
29
- apache_airflow_providers_databricks-7.8.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
30
- apache_airflow_providers_databricks-7.8.0rc1.dist-info/METADATA,sha256=Dts1HmORUW1KUFBXvtAotQuZvHRDbj0iMadzjbWFyNg,7782
31
- apache_airflow_providers_databricks-7.8.0rc1.dist-info/RECORD,,