apache-airflow-providers-databricks 7.3.2rc1__py3-none-any.whl → 7.4.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apache-airflow-providers-databricks might be problematic. Click here for more details.

@@ -29,11 +29,11 @@ from airflow import __version__ as airflow_version
29
29
 
30
30
  __all__ = ["__version__"]
31
31
 
32
- __version__ = "7.3.2"
32
+ __version__ = "7.4.0"
33
33
 
34
34
  if packaging.version.parse(packaging.version.parse(airflow_version).base_version) < packaging.version.parse(
35
- "2.9.0"
35
+ "2.10.0"
36
36
  ):
37
37
  raise RuntimeError(
38
- f"The package `apache-airflow-providers-databricks:{__version__}` needs Apache Airflow 2.9.0+"
38
+ f"The package `apache-airflow-providers-databricks:{__version__}` needs Apache Airflow 2.10.0+"
39
39
  )
@@ -35,7 +35,8 @@ from databricks import sql # type: ignore[attr-defined]
35
35
  from databricks.sql.types import Row
36
36
 
37
37
  from airflow.exceptions import AirflowException
38
- from airflow.providers.common.sql.hooks.sql import DbApiHook, return_single_query_results
38
+ from airflow.providers.common.sql.hooks.handlers import return_single_query_results
39
+ from airflow.providers.common.sql.hooks.sql import DbApiHook
39
40
  from airflow.providers.databricks.exceptions import DatabricksSqlExecutionError, DatabricksSqlExecutionTimeout
40
41
  from airflow.providers.databricks.hooks.databricks_base import BaseDatabricksHook
41
42
 
@@ -43,6 +44,8 @@ if TYPE_CHECKING:
43
44
  from databricks.sql.client import Connection
44
45
 
45
46
  from airflow.models.connection import Connection as AirflowConnection
47
+ from airflow.providers.openlineage.extractors import OperatorLineage
48
+ from airflow.providers.openlineage.sqlparser import DatabaseInfo
46
49
 
47
50
 
48
51
  LIST_SQL_ENDPOINTS_ENDPOINT = ("GET", "api/2.0/sql/endpoints")
@@ -107,6 +110,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
107
110
  self.catalog = catalog
108
111
  self.schema = schema
109
112
  self.additional_params = kwargs
113
+ self.query_ids: list[str] = []
110
114
 
111
115
  def _get_extra_config(self) -> dict[str, Any | None]:
112
116
  extra_params = copy(self.databricks_conn.extra_dejson)
@@ -226,6 +230,8 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
226
230
  it will raise and fail.
227
231
  """
228
232
  self.descriptions = []
233
+ self.query_ids = []
234
+
229
235
  if isinstance(sql, str):
230
236
  if split_statements:
231
237
  sql_list = [self.strip_sql_string(s) for s in self.split_sql_string(sql)]
@@ -242,6 +248,7 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
242
248
  conn = None
243
249
  results = []
244
250
  for sql_statement in sql_list:
251
+ self.log.info("Running statement: %s, parameters: %s", sql_statement, parameters)
245
252
  # when using AAD tokens, it could expire if previous query run longer than token lifetime
246
253
  conn = self.get_conn()
247
254
  with closing(conn.cursor()) as cur:
@@ -265,6 +272,10 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
265
272
  if t is not None:
266
273
  t.cancel()
267
274
 
275
+ if query_id := cur.query_id:
276
+ self.log.info("Databricks query id: %s", query_id)
277
+ self.query_ids.append(query_id)
278
+
268
279
  if handler is not None:
269
280
  raw_result = handler(cur)
270
281
  result = self._make_common_data_structure(raw_result)
@@ -307,3 +318,80 @@ class DatabricksSqlHook(BaseDatabricksHook, DbApiHook):
307
318
 
308
319
  def bulk_load(self, table, tmp_file):
309
320
  raise NotImplementedError()
321
+
322
+ def get_openlineage_database_info(self, connection) -> DatabaseInfo:
323
+ from airflow.providers.openlineage.sqlparser import DatabaseInfo
324
+
325
+ return DatabaseInfo(
326
+ scheme=self.get_openlineage_database_dialect(connection),
327
+ authority=self.host,
328
+ database=self.catalog,
329
+ information_schema_columns=[
330
+ "table_schema",
331
+ "table_name",
332
+ "column_name",
333
+ "ordinal_position",
334
+ "data_type",
335
+ "table_catalog",
336
+ ],
337
+ is_information_schema_cross_db=True,
338
+ )
339
+
340
+ def get_openlineage_database_dialect(self, _) -> str:
341
+ return "databricks"
342
+
343
+ def get_openlineage_default_schema(self) -> str | None:
344
+ return self.schema or "default"
345
+
346
+ def get_openlineage_database_specific_lineage(self, task_instance) -> OperatorLineage | None:
347
+ """
348
+ Generate OpenLineage metadata for a Databricks task instance based on executed query IDs.
349
+
350
+ If a single query ID is present, attach an `ExternalQueryRunFacet` to the lineage metadata.
351
+ If multiple query IDs are present, emits separate OpenLineage events for each query instead.
352
+
353
+ Note that `get_openlineage_database_specific_lineage` is usually called after task's execution,
354
+ so if multiple query IDs are present, both START and COMPLETE event for each query will be emitted
355
+ after task's execution. If we are able to query Databricks for query execution metadata,
356
+ query event times will correspond to actual query's start and finish times.
357
+
358
+ Args:
359
+ task_instance: The Airflow TaskInstance object for which lineage is being collected.
360
+
361
+ Returns:
362
+ An `OperatorLineage` object if a single query ID is found; otherwise `None`.
363
+ """
364
+ from airflow.providers.common.compat.openlineage.facet import ExternalQueryRunFacet
365
+ from airflow.providers.databricks.utils.openlineage import (
366
+ emit_openlineage_events_for_databricks_queries,
367
+ )
368
+ from airflow.providers.openlineage.extractors import OperatorLineage
369
+ from airflow.providers.openlineage.sqlparser import SQLParser
370
+
371
+ if not self.query_ids:
372
+ self.log.debug("openlineage: no databricks query ids found.")
373
+ return None
374
+
375
+ self.log.debug("openlineage: getting connection to get database info")
376
+ connection = self.get_connection(self.get_conn_id())
377
+ namespace = SQLParser.create_namespace(self.get_openlineage_database_info(connection))
378
+
379
+ if len(self.query_ids) == 1:
380
+ self.log.debug("Attaching ExternalQueryRunFacet with single query_id to OpenLineage event.")
381
+ return OperatorLineage(
382
+ run_facets={
383
+ "externalQuery": ExternalQueryRunFacet(
384
+ externalQueryId=self.query_ids[0], source=namespace
385
+ )
386
+ }
387
+ )
388
+
389
+ self.log.info("Multiple query_ids found. Separate OpenLineage event will be emitted for each query.")
390
+ emit_openlineage_events_for_databricks_queries(
391
+ query_ids=self.query_ids,
392
+ query_source_namespace=namespace,
393
+ task_instance=task_instance,
394
+ hook=self,
395
+ )
396
+
397
+ return None
@@ -22,6 +22,7 @@ from __future__ import annotations
22
22
  import csv
23
23
  import json
24
24
  from collections.abc import Sequence
25
+ from functools import cached_property
25
26
  from typing import TYPE_CHECKING, Any, ClassVar
26
27
 
27
28
  from databricks.sql.utils import ParamEscaper
@@ -106,7 +107,8 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
106
107
  self.catalog = catalog
107
108
  self.schema = schema
108
109
 
109
- def get_db_hook(self) -> DatabricksSqlHook:
110
+ @cached_property
111
+ def _hook(self) -> DatabricksSqlHook:
110
112
  hook_params = {
111
113
  "http_path": self.http_path,
112
114
  "session_configuration": self.session_configuration,
@@ -120,6 +122,9 @@ class DatabricksSqlOperator(SQLExecuteQueryOperator):
120
122
  }
121
123
  return DatabricksSqlHook(self.databricks_conn_id, **hook_params)
122
124
 
125
+ def get_db_hook(self) -> DatabricksSqlHook:
126
+ return self._hook
127
+
123
128
  def _should_run_output_processing(self) -> bool:
124
129
  return self.do_xcom_push or bool(self._output_path)
125
130
 
@@ -0,0 +1,336 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+ from __future__ import annotations
18
+
19
+ import datetime
20
+ import json
21
+ import logging
22
+ from typing import TYPE_CHECKING, Any
23
+
24
+ import requests
25
+
26
+ from airflow.providers.common.compat.openlineage.check import require_openlineage_version
27
+ from airflow.providers.databricks.version_compat import AIRFLOW_V_3_0_PLUS
28
+ from airflow.utils import timezone
29
+
30
+ if TYPE_CHECKING:
31
+ from openlineage.client.event_v2 import RunEvent
32
+ from openlineage.client.facet_v2 import JobFacet
33
+
34
+ from airflow.providers.databricks.hooks.databricks_sql import DatabricksSqlHook
35
+
36
+ log = logging.getLogger(__name__)
37
+
38
+
39
+ def _get_logical_date(task_instance):
40
+ # todo: remove when min airflow version >= 3.0
41
+ if AIRFLOW_V_3_0_PLUS:
42
+ dagrun = task_instance.get_template_context()["dag_run"]
43
+ return dagrun.logical_date or dagrun.run_after
44
+
45
+ if hasattr(task_instance, "logical_date"):
46
+ date = task_instance.logical_date
47
+ else:
48
+ date = task_instance.execution_date
49
+
50
+ return date
51
+
52
+
53
+ def _get_dag_run_clear_number(task_instance):
54
+ # todo: remove when min airflow version >= 3.0
55
+ if AIRFLOW_V_3_0_PLUS:
56
+ dagrun = task_instance.get_template_context()["dag_run"]
57
+ return dagrun.clear_number
58
+ return task_instance.dag_run.clear_number
59
+
60
+
61
+ # todo: move this run_id logic into OpenLineage's listener to avoid differences
62
+ def _get_ol_run_id(task_instance) -> str:
63
+ """
64
+ Get OpenLineage run_id from TaskInstance.
65
+
66
+ It's crucial that the task_instance's run_id creation logic matches OpenLineage's listener implementation.
67
+ Only then can we ensure that the generated run_id aligns with the Airflow task,
68
+ enabling a proper connection between events.
69
+ """
70
+ from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
71
+
72
+ # Generate same OL run id as is generated for current task instance
73
+ return OpenLineageAdapter.build_task_instance_run_id(
74
+ dag_id=task_instance.dag_id,
75
+ task_id=task_instance.task_id,
76
+ logical_date=_get_logical_date(task_instance),
77
+ try_number=task_instance.try_number,
78
+ map_index=task_instance.map_index,
79
+ )
80
+
81
+
82
+ # todo: move this run_id logic into OpenLineage's listener to avoid differences
83
+ def _get_ol_dag_run_id(task_instance) -> str:
84
+ from airflow.providers.openlineage.plugins.adapter import OpenLineageAdapter
85
+
86
+ return OpenLineageAdapter.build_dag_run_id(
87
+ dag_id=task_instance.dag_id,
88
+ logical_date=_get_logical_date(task_instance),
89
+ clear_number=_get_dag_run_clear_number(task_instance),
90
+ )
91
+
92
+
93
+ def _get_parent_run_facet(task_instance):
94
+ """
95
+ Retrieve the ParentRunFacet associated with a specific Airflow task instance.
96
+
97
+ This facet helps link OpenLineage events of child jobs - such as queries executed within
98
+ external systems (e.g., Databricks) by the Airflow task - to the original Airflow task execution.
99
+ Establishing this connection enables better lineage tracking and observability.
100
+ """
101
+ from openlineage.client.facet_v2 import parent_run
102
+
103
+ from airflow.providers.openlineage.conf import namespace
104
+
105
+ parent_run_id = _get_ol_run_id(task_instance)
106
+ root_parent_run_id = _get_ol_dag_run_id(task_instance)
107
+
108
+ return parent_run.ParentRunFacet(
109
+ run=parent_run.Run(runId=parent_run_id),
110
+ job=parent_run.Job(
111
+ namespace=namespace(),
112
+ name=f"{task_instance.dag_id}.{task_instance.task_id}",
113
+ ),
114
+ root=parent_run.Root(
115
+ run=parent_run.RootRun(runId=root_parent_run_id),
116
+ job=parent_run.RootJob(
117
+ name=task_instance.dag_id,
118
+ namespace=namespace(),
119
+ ),
120
+ ),
121
+ )
122
+
123
+
124
+ def _run_api_call(hook: DatabricksSqlHook, query_ids: list[str]) -> list[dict]:
125
+ """Retrieve execution details for specific queries from Databricks's query history API."""
126
+ if not hook._token:
127
+ # This has logic for token initialization
128
+ hook.get_conn()
129
+
130
+ # https://docs.databricks.com/api/azure/workspace/queryhistory/list
131
+ try:
132
+ response = requests.get(
133
+ url=f"https://{hook.host}/api/2.0/sql/history/queries",
134
+ headers={"Authorization": f"Bearer {hook._token}"},
135
+ data=json.dumps({"filter_by": {"statement_ids": query_ids}}),
136
+ timeout=2,
137
+ )
138
+ except Exception as e:
139
+ log.warning(
140
+ "OpenLineage could not retrieve Databricks queries details. Error received: `%s`.",
141
+ e,
142
+ )
143
+ return []
144
+
145
+ if response.status_code != 200:
146
+ log.warning(
147
+ "OpenLineage could not retrieve Databricks queries details. API error received: `%s`: `%s`",
148
+ response.status_code,
149
+ response.text,
150
+ )
151
+ return []
152
+
153
+ return response.json()["res"]
154
+
155
+
156
+ def _get_queries_details_from_databricks(
157
+ hook: DatabricksSqlHook, query_ids: list[str]
158
+ ) -> dict[str, dict[str, Any]]:
159
+ if not query_ids:
160
+ return {}
161
+
162
+ queries_info_from_api = _run_api_call(hook=hook, query_ids=query_ids)
163
+
164
+ query_details = {}
165
+ for query_info in queries_info_from_api:
166
+ if not query_info.get("query_id"):
167
+ log.debug("Databricks query ID not found in API response.")
168
+ continue
169
+
170
+ q_start_time = None
171
+ q_end_time = None
172
+ if query_info.get("query_start_time_ms") and query_info.get("query_end_time_ms"):
173
+ q_start_time = datetime.datetime.fromtimestamp(
174
+ query_info["query_start_time_ms"] / 1000, tz=datetime.timezone.utc
175
+ )
176
+ q_end_time = datetime.datetime.fromtimestamp(
177
+ query_info["query_end_time_ms"] / 1000, tz=datetime.timezone.utc
178
+ )
179
+
180
+ query_details[query_info["query_id"]] = {
181
+ "status": query_info.get("status"),
182
+ "start_time": q_start_time,
183
+ "end_time": q_end_time,
184
+ "query_text": query_info.get("query_text"),
185
+ "error_message": query_info.get("error_message"),
186
+ }
187
+
188
+ return query_details
189
+
190
+
191
+ def _create_ol_event_pair(
192
+ job_namespace: str,
193
+ job_name: str,
194
+ start_time: datetime.datetime,
195
+ end_time: datetime.datetime,
196
+ is_successful: bool,
197
+ run_facets: dict | None = None,
198
+ job_facets: dict | None = None,
199
+ ) -> tuple[RunEvent, RunEvent]:
200
+ """Create a pair of OpenLineage RunEvents representing the start and end of a query execution."""
201
+ from openlineage.client.event_v2 import Job, Run, RunEvent, RunState
202
+ from openlineage.client.uuid import generate_new_uuid
203
+
204
+ run = Run(runId=str(generate_new_uuid()), facets=run_facets or {})
205
+ job = Job(namespace=job_namespace, name=job_name, facets=job_facets or {})
206
+
207
+ start = RunEvent(
208
+ eventType=RunState.START,
209
+ eventTime=start_time.isoformat(),
210
+ run=run,
211
+ job=job,
212
+ )
213
+ end = RunEvent(
214
+ eventType=RunState.COMPLETE if is_successful else RunState.FAIL,
215
+ eventTime=end_time.isoformat(),
216
+ run=run,
217
+ job=job,
218
+ )
219
+ return start, end
220
+
221
+
222
+ @require_openlineage_version(provider_min_version="2.3.0")
223
+ def emit_openlineage_events_for_databricks_queries(
224
+ query_ids: list[str],
225
+ query_source_namespace: str,
226
+ task_instance,
227
+ hook: DatabricksSqlHook | None = None,
228
+ additional_run_facets: dict | None = None,
229
+ additional_job_facets: dict | None = None,
230
+ ) -> None:
231
+ """
232
+ Emit OpenLineage events for executed Databricks queries.
233
+
234
+ Metadata retrieval from Databricks is attempted only if a `DatabricksSqlHook` is provided.
235
+ If metadata is available, execution details such as start time, end time, execution status,
236
+ error messages, and SQL text are included in the events. If no metadata is found, the function
237
+ defaults to using the Airflow task instance's state and the current timestamp.
238
+
239
+ Note that both START and COMPLETE event for each query will be emitted at the same time.
240
+ If we are able to query Databricks for query execution metadata, event times
241
+ will correspond to actual query execution times.
242
+
243
+ Args:
244
+ query_ids: A list of Databricks query IDs to emit events for.
245
+ query_source_namespace: The namespace to be included in ExternalQueryRunFacet.
246
+ task_instance: The Airflow task instance that run these queries.
247
+ hook: A hook instance used to retrieve query metadata if available.
248
+ additional_run_facets: Additional run facets to include in OpenLineage events.
249
+ additional_job_facets: Additional job facets to include in OpenLineage events.
250
+ """
251
+ from openlineage.client.facet_v2 import job_type_job
252
+
253
+ from airflow.providers.common.compat.openlineage.facet import (
254
+ ErrorMessageRunFacet,
255
+ ExternalQueryRunFacet,
256
+ RunFacet,
257
+ SQLJobFacet,
258
+ )
259
+ from airflow.providers.openlineage.conf import namespace
260
+ from airflow.providers.openlineage.plugins.listener import get_openlineage_listener
261
+
262
+ if not query_ids:
263
+ log.debug("No Databricks query IDs provided; skipping OpenLineage event emission.")
264
+ return
265
+
266
+ query_ids = [q for q in query_ids] # Make a copy to make sure it does not change
267
+
268
+ if hook:
269
+ log.debug("Retrieving metadata for %s queries from Databricks.", len(query_ids))
270
+ databricks_metadata = _get_queries_details_from_databricks(hook, query_ids)
271
+ else:
272
+ log.debug("DatabricksSqlHook not provided. No extra metadata fill be fetched from Databricks.")
273
+ databricks_metadata = {}
274
+
275
+ # If real metadata is unavailable, we send events with eventTime=now
276
+ default_event_time = timezone.utcnow()
277
+ # If no query metadata is provided, we use task_instance's state when checking for success
278
+ # Adjust state for DBX logic, where "finished" means "success"
279
+ default_state = task_instance.state.value if hasattr(task_instance, "state") else ""
280
+ default_state = "finished" if default_state == "success" else default_state
281
+
282
+ log.debug("Generating OpenLineage facets")
283
+ common_run_facets = {"parent": _get_parent_run_facet(task_instance)}
284
+ common_job_facets: dict[str, JobFacet] = {
285
+ "jobType": job_type_job.JobTypeJobFacet(
286
+ jobType="QUERY",
287
+ integration="DATABRICKS",
288
+ processingType="BATCH",
289
+ )
290
+ }
291
+ additional_run_facets = additional_run_facets or {}
292
+ additional_job_facets = additional_job_facets or {}
293
+
294
+ events: list[RunEvent] = []
295
+ for counter, query_id in enumerate(query_ids, 1):
296
+ query_metadata = databricks_metadata.get(query_id, {})
297
+ log.debug(
298
+ "Metadata for query no. %s, (ID `%s`): `%s`",
299
+ counter,
300
+ query_id,
301
+ query_metadata if query_metadata else "not found",
302
+ )
303
+
304
+ query_specific_run_facets: dict[str, RunFacet] = {
305
+ "externalQuery": ExternalQueryRunFacet(externalQueryId=query_id, source=query_source_namespace)
306
+ }
307
+ if query_metadata.get("error_message"):
308
+ query_specific_run_facets["error"] = ErrorMessageRunFacet(
309
+ message=query_metadata["error_message"],
310
+ programmingLanguage="SQL",
311
+ )
312
+
313
+ query_specific_job_facets = {}
314
+ if query_metadata.get("query_text"):
315
+ query_specific_job_facets["sql"] = SQLJobFacet(query=query_metadata["query_text"])
316
+
317
+ log.debug("Creating OpenLineage event pair for query ID: %s", query_id)
318
+ event_batch = _create_ol_event_pair(
319
+ job_namespace=namespace(),
320
+ job_name=f"{task_instance.dag_id}.{task_instance.task_id}.query.{counter}",
321
+ start_time=query_metadata.get("start_time", default_event_time), # type: ignore[arg-type]
322
+ end_time=query_metadata.get("end_time", default_event_time), # type: ignore[arg-type]
323
+ # Only finished status means it completed without failures
324
+ is_successful=query_metadata.get("status", default_state).lower() == "finished",
325
+ run_facets={**query_specific_run_facets, **common_run_facets, **additional_run_facets},
326
+ job_facets={**query_specific_job_facets, **common_job_facets, **additional_job_facets},
327
+ )
328
+ events.extend(event_batch)
329
+
330
+ log.debug("Generated %s OpenLineage events; emitting now.", len(events))
331
+ adapter = get_openlineage_listener().adapter
332
+ for event in events:
333
+ adapter.emit(event)
334
+
335
+ log.info("OpenLineage has successfully finished processing information about Databricks queries.")
336
+ return
@@ -32,5 +32,4 @@ def get_base_airflow_version_tuple() -> tuple[int, int, int]:
32
32
  return airflow_version.major, airflow_version.minor, airflow_version.micro
33
33
 
34
34
 
35
- AIRFLOW_V_2_10_PLUS = get_base_airflow_version_tuple() >= (2, 10, 0)
36
35
  AIRFLOW_V_3_0_PLUS = get_base_airflow_version_tuple() >= (3, 0, 0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apache-airflow-providers-databricks
3
- Version: 7.3.2rc1
3
+ Version: 7.4.0rc1
4
4
  Summary: Provider package apache-airflow-providers-databricks for Apache Airflow
5
5
  Keywords: airflow-provider,databricks,airflow,integration
6
6
  Author-email: Apache Software Foundation <dev@airflow.apache.org>
@@ -20,27 +20,31 @@ Classifier: Programming Language :: Python :: 3.10
20
20
  Classifier: Programming Language :: Python :: 3.11
21
21
  Classifier: Programming Language :: Python :: 3.12
22
22
  Classifier: Topic :: System :: Monitoring
23
- Requires-Dist: apache-airflow>=2.9.0rc0
24
- Requires-Dist: apache-airflow-providers-common-sql>=1.20.0rc0
23
+ Requires-Dist: apache-airflow>=2.10.0rc1
24
+ Requires-Dist: apache-airflow-providers-common-compat>=1.6.0rc1
25
+ Requires-Dist: apache-airflow-providers-common-sql>=1.27.0rc1
25
26
  Requires-Dist: requests>=2.31.0,<3
26
27
  Requires-Dist: databricks-sql-connector>=3.0.0
28
+ Requires-Dist: databricks-sqlalchemy>=1.0.2
27
29
  Requires-Dist: aiohttp>=3.9.2, <4
28
30
  Requires-Dist: mergedeep>=1.3.4
29
31
  Requires-Dist: pandas>=2.1.2,<2.2
30
32
  Requires-Dist: pyarrow>=14.0.1
31
33
  Requires-Dist: azure-identity>=1.3.1 ; extra == "azure-identity"
32
34
  Requires-Dist: apache-airflow-providers-fab ; extra == "fab"
35
+ Requires-Dist: apache-airflow-providers-openlineage>=2.3.0rc1 ; extra == "openlineage"
33
36
  Requires-Dist: databricks-sdk==0.10.0 ; extra == "sdk"
34
37
  Requires-Dist: apache-airflow-providers-standard ; extra == "standard"
35
38
  Project-URL: Bug Tracker, https://github.com/apache/airflow/issues
36
- Project-URL: Changelog, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.3.2/changelog.html
37
- Project-URL: Documentation, https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.3.2
39
+ Project-URL: Changelog, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.4.0/changelog.html
40
+ Project-URL: Documentation, https://airflow.staged.apache.org/docs/apache-airflow-providers-databricks/7.4.0
38
41
  Project-URL: Mastodon, https://fosstodon.org/@airflow
39
42
  Project-URL: Slack Chat, https://s.apache.org/airflow-slack
40
43
  Project-URL: Source Code, https://github.com/apache/airflow
41
44
  Project-URL: YouTube, https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/
42
45
  Provides-Extra: azure-identity
43
46
  Provides-Extra: fab
47
+ Provides-Extra: openlineage
44
48
  Provides-Extra: sdk
45
49
  Provides-Extra: standard
46
50
 
@@ -69,7 +73,7 @@ Provides-Extra: standard
69
73
 
70
74
  Package ``apache-airflow-providers-databricks``
71
75
 
72
- Release: ``7.3.2``
76
+ Release: ``7.4.0``
73
77
 
74
78
 
75
79
  `Databricks <https://databricks.com/>`__
@@ -82,7 +86,7 @@ This is a provider package for ``databricks`` provider. All classes for this pro
82
86
  are in ``airflow.providers.databricks`` python package.
83
87
 
84
88
  You can find package information and changelog for the provider
85
- in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.3.2/>`_.
89
+ in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.4.0/>`_.
86
90
 
87
91
  Installation
88
92
  ------------
@@ -96,18 +100,20 @@ The package supports the following python versions: 3.9,3.10,3.11,3.12
96
100
  Requirements
97
101
  ------------
98
102
 
99
- ======================================= ==================
100
- PIP package Version required
101
- ======================================= ==================
102
- ``apache-airflow`` ``>=2.9.0``
103
- ``apache-airflow-providers-common-sql`` ``>=1.20.0``
104
- ``requests`` ``>=2.31.0,<3``
105
- ``databricks-sql-connector`` ``>=3.0.0``
106
- ``aiohttp`` ``>=3.9.2,<4``
107
- ``mergedeep`` ``>=1.3.4``
108
- ``pandas`` ``>=2.1.2,<2.2``
109
- ``pyarrow`` ``>=14.0.1``
110
- ======================================= ==================
103
+ ========================================== ==================
104
+ PIP package Version required
105
+ ========================================== ==================
106
+ ``apache-airflow`` ``>=2.10.0``
107
+ ``apache-airflow-providers-common-compat`` ``>=1.6.0``
108
+ ``apache-airflow-providers-common-sql`` ``>=1.27.0``
109
+ ``requests`` ``>=2.31.0,<3``
110
+ ``databricks-sql-connector`` ``>=3.0.0``
111
+ ``databricks-sqlalchemy`` ``>=1.0.2``
112
+ ``aiohttp`` ``>=3.9.2,<4``
113
+ ``mergedeep`` ``>=1.3.4``
114
+ ``pandas`` ``>=2.1.2,<2.2``
115
+ ``pyarrow`` ``>=14.0.1``
116
+ ========================================== ==================
111
117
 
112
118
  Cross provider package dependencies
113
119
  -----------------------------------
@@ -130,5 +136,5 @@ Dependent package
130
136
  ============================================================================================================ ==============
131
137
 
132
138
  The changelog for the provider package can be found in the
133
- `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.3.2/changelog.html>`_.
139
+ `changelog <https://airflow.apache.org/docs/apache-airflow-providers-databricks/7.4.0/changelog.html>`_.
134
140
 
@@ -1,16 +1,16 @@
1
1
  airflow/providers/databricks/LICENSE,sha256=gXPVwptPlW1TJ4HSuG5OMPg-a3h43OGMkZRR1rpwfJA,10850
2
- airflow/providers/databricks/__init__.py,sha256=1iYKoYjdiEI3gbSdzOWztaFJUwgD8AKuHKP04iHxV8o,1497
2
+ airflow/providers/databricks/__init__.py,sha256=D4S1V7H6S0R_iJ75oqoynUbmOYkMET00uTSUyPzvkzM,1499
3
3
  airflow/providers/databricks/exceptions.py,sha256=85RklmLOI_PnTzfXNIUd5fAu2aMMUhelwumQAX0wANE,1261
4
4
  airflow/providers/databricks/get_provider_info.py,sha256=qNMX4Lft-NItPhFewFBSCi8n0_ISid_MQeETKQ67vdo,5573
5
- airflow/providers/databricks/version_compat.py,sha256=aHg90_DtgoSnQvILFICexMyNlHlALBdaeWqkX3dFDug,1605
5
+ airflow/providers/databricks/version_compat.py,sha256=j5PCtXvZ71aBjixu-EFTNtVDPsngzzs7os0ZQDgFVDk,1536
6
6
  airflow/providers/databricks/hooks/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
7
7
  airflow/providers/databricks/hooks/databricks.py,sha256=FIoiKWIc9AP3s8Av3Av9yleTg1kI0norwW5CAc6jTQc,28867
8
8
  airflow/providers/databricks/hooks/databricks_base.py,sha256=D7-_74QgQaZm1NfHKl_UOXbVAXRo2xjnOx_r1MI-rWI,34871
9
- airflow/providers/databricks/hooks/databricks_sql.py,sha256=fdxjjeR1u-1dSlbVEBYX0v3XAb8jTT74BVMi3mYY2OE,13092
9
+ airflow/providers/databricks/hooks/databricks_sql.py,sha256=xTdi0JN-ZdsGe2XnCa8yBi-AINZUlyIVlP-5nb2d2T0,16964
10
10
  airflow/providers/databricks/operators/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvRQpt3YR7wjz_nceyF1IaI,787
11
11
  airflow/providers/databricks/operators/databricks.py,sha256=E8fgk3Z67uOTSvWvbF23Miv6EruSGOTdFvHn7pGVWp0,80138
12
12
  airflow/providers/databricks/operators/databricks_repos.py,sha256=m_72OnnU9df7UB-8SK2Tp5VjfNyjYeAnil3dCKs9SbA,13282
13
- airflow/providers/databricks/operators/databricks_sql.py,sha256=thBHpt9_LMLJZ0PN-eLCI3AaT8IFq3NAHLDWDFP-Jiw,17031
13
+ airflow/providers/databricks/operators/databricks_sql.py,sha256=Ycp5mcb3uScQrognB2k8IeSR9oBx-Vnv6NEYGYuE800,17159
14
14
  airflow/providers/databricks/operators/databricks_workflow.py,sha256=9WNQR9COa90fbqb9qSzut34K9Z1S_ZdpNHAfIcuH454,14227
15
15
  airflow/providers/databricks/plugins/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
16
16
  airflow/providers/databricks/plugins/databricks_workflow.py,sha256=1UpsodBLRrTah9zBGBzfM7n1pdkzTo7yilt6QxASspQ,17460
@@ -21,7 +21,8 @@ airflow/providers/databricks/triggers/__init__.py,sha256=mlJxuZLkd5x-iq2SBwD3mvR
21
21
  airflow/providers/databricks/triggers/databricks.py,sha256=dSogx6GlcJfZ4CFhtlMeWs9sYFEYthP82S_U8-tM2Tk,9240
22
22
  airflow/providers/databricks/utils/__init__.py,sha256=9hdXHABrVpkbpjZgUft39kOFL2xSGeG4GEua0Hmelus,785
23
23
  airflow/providers/databricks/utils/databricks.py,sha256=s0qEr_DsFhKW4uUiq2VQbtqcj52isYIplPZsUcxGPrI,2862
24
- apache_airflow_providers_databricks-7.3.2rc1.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
25
- apache_airflow_providers_databricks-7.3.2rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
26
- apache_airflow_providers_databricks-7.3.2rc1.dist-info/METADATA,sha256=-vW0I-mfTB60WU9-Qk80dGC29S8C97ULEuGW8_GRA1s,6088
27
- apache_airflow_providers_databricks-7.3.2rc1.dist-info/RECORD,,
24
+ airflow/providers/databricks/utils/openlineage.py,sha256=7fR3CPcOruHapsz1DOZ38QN3ZcAGDADNHPY28CzYCbg,13194
25
+ apache_airflow_providers_databricks-7.4.0rc1.dist-info/entry_points.txt,sha256=hjmZm3ab2cteTR4t9eE28oKixHwNIKtLCThd6sx3XRQ,227
26
+ apache_airflow_providers_databricks-7.4.0rc1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
27
+ apache_airflow_providers_databricks-7.4.0rc1.dist-info/METADATA,sha256=pR6Sl96lEQpHq-x1FYiLA_rNd6-R_e_BsGibC8qrFps,6475
28
+ apache_airflow_providers_databricks-7.4.0rc1.dist-info/RECORD,,